1use lazy_static::lazy_static;
22use regex::Regex;
23use serde::{Deserialize, Serialize};
24use std::collections::{HashMap, HashSet, VecDeque};
25
26use crate::types::{CfgInfo, RefType, VarRef};
27use crate::Language;
28use crate::TldrError;
29
30const MAX_TAINT_ITERATIONS: usize = 1000;
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
46#[serde(rename_all = "snake_case")]
47pub enum TaintSourceType {
48 UserInput,
50 Stdin,
52 HttpParam,
54 HttpBody,
56 EnvVar,
58 FileRead,
60}
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
66#[serde(rename_all = "snake_case")]
67pub enum TaintSinkType {
68 SqlQuery,
70 CodeEval,
72 CodeExec,
74 CodeCompile,
76 ShellExec,
78 FileWrite,
80}
81
82#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
86#[serde(rename_all = "snake_case")]
87pub enum SanitizerType {
88 Numeric,
90 Shell,
92 Html,
94}
95
96#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct TaintSource {
103 pub var: String,
105 pub line: u32,
107 pub source_type: TaintSourceType,
109 #[serde(skip_serializing_if = "Option::is_none")]
111 pub statement: Option<String>,
112}
113
114#[derive(Debug, Clone, Serialize, Deserialize)]
116pub struct TaintSink {
117 pub var: String,
119 pub line: u32,
121 pub sink_type: TaintSinkType,
123 pub tainted: bool,
125 #[serde(skip_serializing_if = "Option::is_none")]
127 pub statement: Option<String>,
128}
129
130#[derive(Debug, Clone, Serialize, Deserialize)]
132pub struct TaintFlow {
133 pub source: TaintSource,
135 pub sink: TaintSink,
137 pub path: Vec<usize>,
139}
140
141#[derive(Debug, Clone, Default, Serialize, Deserialize)]
146pub struct TaintInfo {
147 pub function_name: String,
149 pub tainted_vars: HashMap<usize, HashSet<String>>,
151 pub sources: Vec<TaintSource>,
153 pub sinks: Vec<TaintSink>,
155 pub flows: Vec<TaintFlow>,
157 pub sanitized_vars: HashSet<String>,
159 #[serde(default = "default_convergence")]
162 #[serde(skip_serializing_if = "Option::is_none")]
163 pub convergence: Option<String>,
164}
165
166fn default_convergence() -> Option<String> {
167 None
168}
169
170impl TaintInfo {
175 pub fn new(function_name: impl Into<String>) -> Self {
177 Self {
178 function_name: function_name.into(),
179 tainted_vars: HashMap::new(),
180 sources: Vec::new(),
181 sinks: Vec::new(),
182 flows: Vec::new(),
183 sanitized_vars: HashSet::new(),
184 convergence: None,
185 }
186 }
187
188 pub fn is_tainted(&self, block_id: usize, var: &str) -> bool {
192 self.tainted_vars
193 .get(&block_id)
194 .map(|vars| vars.contains(var))
195 .unwrap_or(false)
196 }
197
198 pub fn get_vulnerabilities(&self) -> Vec<&TaintSink> {
200 self.sinks.iter().filter(|s| s.tainted).collect()
201 }
202}
203
204pub fn build_predecessors(cfg: &CfgInfo) -> HashMap<usize, Vec<usize>> {
213 let mut preds: HashMap<usize, Vec<usize>> = HashMap::new();
214
215 for block in &cfg.blocks {
217 preds.entry(block.id).or_default();
218 }
219
220 for edge in &cfg.edges {
222 preds.entry(edge.to).or_default().push(edge.from);
223 }
224
225 preds
226}
227
228pub fn build_successors(cfg: &CfgInfo) -> HashMap<usize, Vec<usize>> {
233 let mut succs: HashMap<usize, Vec<usize>> = HashMap::new();
234
235 for block in &cfg.blocks {
237 succs.entry(block.id).or_default();
238 }
239
240 for edge in &cfg.edges {
242 succs.entry(edge.from).or_default().push(edge.to);
243 }
244
245 succs
246}
247
248pub fn build_line_to_block(cfg: &CfgInfo) -> HashMap<u32, usize> {
257 let mut mapping: HashMap<u32, usize> = HashMap::new();
258
259 let mut all_lines: HashSet<u32> = HashSet::new();
262 for block in &cfg.blocks {
263 for line in block.lines.0..=block.lines.1 {
264 all_lines.insert(line);
265 }
266 }
267
268 for line in all_lines {
269 let mut best_block: Option<(usize, u32)> = None; for block in &cfg.blocks {
272 let (start, end) = block.lines;
273 if line >= start && line <= end {
274 let size = end - start + 1;
275 if best_block.is_none()
278 || size > best_block.unwrap().1
279 || (size == best_block.unwrap().1 && block.id > best_block.unwrap().0)
280 {
281 best_block = Some((block.id, size));
282 }
283 }
284 }
285
286 if let Some((block_id, _)) = best_block {
287 mapping.insert(line, block_id);
288 }
289 }
290
291 mapping
292}
293
294pub fn build_refs_by_block<'a>(
300 refs: &'a [VarRef],
301 line_to_block: &HashMap<u32, usize>,
302) -> HashMap<usize, Vec<&'a VarRef>> {
303 let mut by_block: HashMap<usize, Vec<&VarRef>> = HashMap::new();
304
305 for var_ref in refs {
306 if let Some(&block_id) = line_to_block.get(&var_ref.line) {
307 by_block.entry(block_id).or_default().push(var_ref);
308 }
309 }
310
311 for refs in by_block.values_mut() {
313 refs.sort_by_key(|r| r.line);
314 }
315
316 by_block
317}
318
319pub fn validate_cfg(cfg: &CfgInfo) -> Result<(), TldrError> {
330 if cfg.blocks.is_empty() {
332 return Err(TldrError::InvalidArgs {
333 arg: "cfg".to_string(),
334 message: "Empty CFG".to_string(),
335 suggestion: None,
336 });
337 }
338
339 let block_ids: HashSet<usize> = cfg.blocks.iter().map(|b| b.id).collect();
341
342 if !block_ids.contains(&cfg.entry_block) {
344 return Err(TldrError::InvalidArgs {
345 arg: "cfg".to_string(),
346 message: format!("Entry block {} not in blocks", cfg.entry_block),
347 suggestion: Some(format!(
348 "Valid block IDs are: {:?}",
349 block_ids.iter().collect::<Vec<_>>()
350 )),
351 });
352 }
353
354 for edge in &cfg.edges {
356 if !block_ids.contains(&edge.from) {
357 return Err(TldrError::InvalidArgs {
358 arg: "cfg".to_string(),
359 message: format!(
360 "Edge references invalid source block: {} -> {}",
361 edge.from, edge.to
362 ),
363 suggestion: Some(format!(
364 "Valid block IDs are: {:?}",
365 block_ids.iter().collect::<Vec<_>>()
366 )),
367 });
368 }
369 if !block_ids.contains(&edge.to) {
370 return Err(TldrError::InvalidArgs {
371 arg: "cfg".to_string(),
372 message: format!(
373 "Edge references invalid target block: {} -> {}",
374 edge.from, edge.to
375 ),
376 suggestion: Some(format!(
377 "Valid block IDs are: {:?}",
378 block_ids.iter().collect::<Vec<_>>()
379 )),
380 });
381 }
382 }
383
384 Ok(())
385}
386
387pub struct LanguagePatterns {
396 pub sources: Vec<(Regex, TaintSourceType)>,
398 pub sinks: Vec<(Regex, TaintSinkType)>,
400 pub sanitizers: Vec<(Regex, SanitizerType)>,
402}
403
404lazy_static! {
405 static ref PYTHON_PATTERNS: LanguagePatterns = LanguagePatterns {
407 sources: vec![
408 (Regex::new(r"\binput\s*\(").unwrap(), TaintSourceType::UserInput),
410 (Regex::new(r"request\.(args|form|json|data|values|cookies|headers)").unwrap(), TaintSourceType::HttpParam),
412 (Regex::new(r"request\.get_json\s*\(").unwrap(), TaintSourceType::HttpBody),
414 (Regex::new(r"sys\.stdin").unwrap(), TaintSourceType::Stdin),
416 (Regex::new(r"os\.(environ|getenv)").unwrap(), TaintSourceType::EnvVar),
418 (Regex::new(r"\.(read|readlines|readline)\s*\(").unwrap(), TaintSourceType::FileRead),
420 ],
421 sinks: vec![
422 (Regex::new(r"\.(execute|executemany)\s*\(").unwrap(), TaintSinkType::SqlQuery),
424 (Regex::new(r"\beval\s*\(").unwrap(), TaintSinkType::CodeEval),
426 (Regex::new(r"\bexec\s*\(").unwrap(), TaintSinkType::CodeExec),
428 (Regex::new(r"\bcompile\s*\(").unwrap(), TaintSinkType::CodeCompile),
430 (Regex::new(r"subprocess\.(run|call|Popen|check_output)\s*\(").unwrap(), TaintSinkType::ShellExec),
432 (Regex::new(r"os\.(system|popen|spawn\w*)\s*\(").unwrap(), TaintSinkType::ShellExec),
434 (Regex::new(r"\.write\s*\(").unwrap(), TaintSinkType::FileWrite),
436 ],
437 sanitizers: vec![
438 (Regex::new(r"\b(int|float|bool)\s*\(").unwrap(), SanitizerType::Numeric),
440 (Regex::new(r"(shlex|pipes)\.quote\s*\(").unwrap(), SanitizerType::Shell),
442 (Regex::new(r"(html|markupsafe|cgi)\.escape\s*\(").unwrap(), SanitizerType::Html),
444 ],
445 };
446}
447
448lazy_static! {
449 static ref TYPESCRIPT_PATTERNS: LanguagePatterns = LanguagePatterns {
451 sources: vec![
452 (Regex::new(r"req\.body").unwrap(), TaintSourceType::HttpBody),
454 (Regex::new(r"req\.(params|query|cookies|headers)").unwrap(), TaintSourceType::HttpParam),
456 (Regex::new(r"process\.env").unwrap(), TaintSourceType::EnvVar),
458 (Regex::new(r"process\.stdin").unwrap(), TaintSourceType::Stdin),
460 (Regex::new(r"readline\s*\(").unwrap(), TaintSourceType::UserInput),
462 (Regex::new(r"\.(read|readFile)\s*\(").unwrap(), TaintSourceType::FileRead),
464 ],
465 sinks: vec![
466 (Regex::new(r"\beval\s*\(").unwrap(), TaintSinkType::CodeEval),
468 (Regex::new(r"new\s+Function\s*\(").unwrap(), TaintSinkType::CodeEval),
470 (Regex::new(r"child_process\.(exec|spawn|execSync|execFile)\s*\(").unwrap(), TaintSinkType::ShellExec),
472 (Regex::new(r"\bexecSync\s*\(").unwrap(), TaintSinkType::ShellExec),
474 (Regex::new(r"\.innerHTML\s*=").unwrap(), TaintSinkType::FileWrite),
476 (Regex::new(r"document\.write\s*\(").unwrap(), TaintSinkType::FileWrite),
478 (Regex::new(r"\.(query|execute)\s*\(").unwrap(), TaintSinkType::SqlQuery),
480 ],
481 sanitizers: vec![
482 (Regex::new(r"\b(parseInt|Number|parseFloat)\s*\(").unwrap(), SanitizerType::Numeric),
484 (Regex::new(r"(encodeURIComponent|DOMPurify\.sanitize)\s*\(").unwrap(), SanitizerType::Html),
486 ],
487 };
488}
489
490lazy_static! {
491 static ref GO_PATTERNS: LanguagePatterns = LanguagePatterns {
493 sources: vec![
494 (Regex::new(r"(fmt\.Scan|bufio\.NewReader|bufio\.NewScanner)").unwrap(), TaintSourceType::UserInput),
496 (Regex::new(r"(r\.(FormValue|PostFormValue|URL\.Query)\s*\(|\.Query\(\))").unwrap(), TaintSourceType::HttpParam),
498 (Regex::new(r"(r\.Body|\.ReadAll\(r\.Body\))").unwrap(), TaintSourceType::HttpBody),
500 (Regex::new(r"os\.Getenv\s*\(").unwrap(), TaintSourceType::EnvVar),
502 (Regex::new(r"os\.Stdin").unwrap(), TaintSourceType::Stdin),
504 (Regex::new(r"(os\.Open|ioutil\.ReadFile)\s*\(").unwrap(), TaintSourceType::FileRead),
506 ],
507 sinks: vec![
508 (Regex::new(r"exec\.Command\s*\(").unwrap(), TaintSinkType::ShellExec),
510 (Regex::new(r"db\.(Exec|Query|QueryRow)\s*\(").unwrap(), TaintSinkType::SqlQuery),
512 (Regex::new(r"(template\.HTML\s*\(|fmt\.Fprintf\s*\()").unwrap(), TaintSinkType::FileWrite),
514 ],
515 sanitizers: vec![
516 (Regex::new(r"strconv\.(Atoi|ParseInt|ParseFloat)\s*\(").unwrap(), SanitizerType::Numeric),
518 (Regex::new(r"(html\.EscapeString|url\.QueryEscape)\s*\(").unwrap(), SanitizerType::Html),
520 ],
521 };
522}
523
524lazy_static! {
525 static ref JAVA_PATTERNS: LanguagePatterns = LanguagePatterns {
527 sources: vec![
528 (Regex::new(r"new\s+Scanner\s*\(System\.in\)").unwrap(), TaintSourceType::Stdin),
530 (Regex::new(r"(readLine\s*\(|new\s+BufferedReader\s*\()").unwrap(), TaintSourceType::UserInput),
532 (Regex::new(r"(request\.getParameter\s*\(|getQueryString\s*\()").unwrap(), TaintSourceType::HttpParam),
534 (Regex::new(r"System\.getenv\s*\(").unwrap(), TaintSourceType::EnvVar),
536 (Regex::new(r"(new\s+FileReader|Files\.readAllLines)").unwrap(), TaintSourceType::FileRead),
538 ],
539 sinks: vec![
540 (Regex::new(r"(Runtime\.getRuntime\(\)\.exec\s*\(|ProcessBuilder\s*\()").unwrap(), TaintSinkType::ShellExec),
542 (Regex::new(r"\.(execute|executeQuery|executeUpdate)\s*\(").unwrap(), TaintSinkType::SqlQuery),
544 (Regex::new(r"Class\.forName\s*\(").unwrap(), TaintSinkType::CodeEval),
546 ],
547 sanitizers: vec![
548 (Regex::new(r"(Integer\.parseInt|Long\.parseLong|Double\.parseDouble)\s*\(").unwrap(), SanitizerType::Numeric),
550 (Regex::new(r"(ESAPI\.encoder\s*\(|StringEscapeUtils\.escapeHtml)").unwrap(), SanitizerType::Html),
552 ],
553 };
554}
555
556lazy_static! {
557 static ref RUST_PATTERNS: LanguagePatterns = LanguagePatterns {
559 sources: vec![
560 (Regex::new(r"(std::)?io::stdin\s*\(").unwrap(), TaintSourceType::Stdin),
562 (Regex::new(r"(std::)?env::var\s*\(").unwrap(), TaintSourceType::EnvVar),
564 (Regex::new(r"(std::)?env::args\s*\(").unwrap(), TaintSourceType::UserInput),
566 (Regex::new(r"((std::)?fs::read_to_string\s*\(|File::open)").unwrap(), TaintSourceType::FileRead),
568 ],
569 sinks: vec![
570 (Regex::new(r"(Command::new\s*\(|std::process::Command)").unwrap(), TaintSinkType::ShellExec),
572 (Regex::new(r"\bunsafe\s*\{").unwrap(), TaintSinkType::CodeEval),
574 (Regex::new(r"std::ptr::(write|read)\s*\(").unwrap(), TaintSinkType::FileWrite),
576 ],
577 sanitizers: vec![
578 (Regex::new(r"\.parse::<(i32|i64|u32|u64|f32|f64|usize|isize)>\s*\(").unwrap(), SanitizerType::Numeric),
580 ],
581 };
582}
583
584lazy_static! {
585 static ref C_PATTERNS: LanguagePatterns = LanguagePatterns {
587 sources: vec![
588 (Regex::new(r"\b(scanf|fscanf|sscanf)\s*\(").unwrap(), TaintSourceType::UserInput),
590 (Regex::new(r"\b(fgets|gets|getchar)\s*\(").unwrap(), TaintSourceType::UserInput),
592 (Regex::new(r"\bgetenv\s*\(").unwrap(), TaintSourceType::EnvVar),
594 (Regex::new(r"\b(fread|fopen)\s*\(").unwrap(), TaintSourceType::FileRead),
596 (Regex::new(r"\b(recv|recvfrom)\s*\(").unwrap(), TaintSourceType::UserInput),
598 ],
599 sinks: vec![
600 (Regex::new(r"\b(system|popen|execl|execv|execvp)\s*\(").unwrap(), TaintSinkType::ShellExec),
602 (Regex::new(r"\b(sprintf|vsprintf)\s*\(").unwrap(), TaintSinkType::ShellExec),
604 (Regex::new(r"\b(strcpy|strcat|strncpy)\s*\(").unwrap(), TaintSinkType::FileWrite),
606 ],
607 sanitizers: vec![
608 (Regex::new(r"\b(atoi|atol|atof|strtol|strtoul|strtod)\s*\(").unwrap(), SanitizerType::Numeric),
610 (Regex::new(r"\bsnprintf\s*\(").unwrap(), SanitizerType::Shell),
612 ],
613 };
614}
615
616lazy_static! {
617 static ref CPP_PATTERNS: LanguagePatterns = LanguagePatterns {
619 sources: vec![
620 (Regex::new(r"std::cin\s*>>").unwrap(), TaintSourceType::UserInput),
622 (Regex::new(r"(std::)?getline\s*\(").unwrap(), TaintSourceType::UserInput),
624 (Regex::new(r"\bgetenv\s*\(").unwrap(), TaintSourceType::EnvVar),
626 (Regex::new(r"std::(ifstream|fstream)").unwrap(), TaintSourceType::FileRead),
628 ],
629 sinks: vec![
630 (Regex::new(r"(\bsystem\s*\(|\bpopen\s*\(|std::system\s*\()").unwrap(), TaintSinkType::ShellExec),
632 (Regex::new(r"\bsprintf\s*\(").unwrap(), TaintSinkType::ShellExec),
634 ],
635 sanitizers: vec![
636 (Regex::new(r"std::sto(i|l|ul|ll|f|d)\s*\(").unwrap(), SanitizerType::Numeric),
638 (Regex::new(r"static_cast<(int|long|float|double)>\s*\(").unwrap(), SanitizerType::Numeric),
640 ],
641 };
642}
643
644lazy_static! {
645 static ref RUBY_PATTERNS: LanguagePatterns = LanguagePatterns {
647 sources: vec![
648 (Regex::new(r"\bgets\b").unwrap(), TaintSourceType::UserInput),
650 (Regex::new(r"STDIN\.(read|gets|readline)").unwrap(), TaintSourceType::Stdin),
652 (Regex::new(r"\bparams\[").unwrap(), TaintSourceType::HttpParam),
654 (Regex::new(r"ENV\[").unwrap(), TaintSourceType::EnvVar),
656 (Regex::new(r"File\.(read|open)\s*\(").unwrap(), TaintSourceType::FileRead),
658 ],
659 sinks: vec![
660 (Regex::new(r"\beval\s*\(").unwrap(), TaintSinkType::CodeEval),
662 (Regex::new(r"\b(system|exec)\s*\(").unwrap(), TaintSinkType::ShellExec),
664 (Regex::new(r"IO\.popen\s*\(").unwrap(), TaintSinkType::ShellExec),
666 (Regex::new(r"\.send\s*\(").unwrap(), TaintSinkType::CodeEval),
668 ],
669 sanitizers: vec![
670 (Regex::new(r"\.(to_i|to_f)\b").unwrap(), SanitizerType::Numeric),
672 (Regex::new(r"(CGI\.escapeHTML|Rack::Utils\.escape_html)\s*\(").unwrap(), SanitizerType::Html),
674 ],
675 };
676}
677
678lazy_static! {
679 static ref KOTLIN_PATTERNS: LanguagePatterns = LanguagePatterns {
681 sources: vec![
682 (Regex::new(r"\b(readLine|readln)\s*\(\)").unwrap(), TaintSourceType::UserInput),
684 (Regex::new(r"System\.getenv\s*\(").unwrap(), TaintSourceType::EnvVar),
686 (Regex::new(r"BufferedReader\s*\(").unwrap(), TaintSourceType::UserInput),
688 (Regex::new(r"request\.getParameter\s*\(").unwrap(), TaintSourceType::HttpParam),
690 ],
691 sinks: vec![
692 (Regex::new(r"(Runtime\.getRuntime\(\)\.exec\s*\(|ProcessBuilder\s*\()").unwrap(), TaintSinkType::ShellExec),
694 (Regex::new(r"\.(execute|executeQuery)\s*\(|prepareStatement\s*\(").unwrap(), TaintSinkType::SqlQuery),
696 ],
697 sanitizers: vec![
698 (Regex::new(r"\.(toInt|toLong|toDouble|toFloat)\s*\(\)").unwrap(), SanitizerType::Numeric),
700 ],
701 };
702}
703
704lazy_static! {
705 static ref SWIFT_PATTERNS: LanguagePatterns = LanguagePatterns {
707 sources: vec![
708 (Regex::new(r"\breadLine\s*\(\)").unwrap(), TaintSourceType::UserInput),
710 (Regex::new(r"ProcessInfo\.processInfo\.environment\[").unwrap(), TaintSourceType::EnvVar),
712 (Regex::new(r"(FileManager\.default|URLSession)").unwrap(), TaintSourceType::FileRead),
714 ],
715 sinks: vec![
716 (Regex::new(r"(Process\s*\(\)|NSTask)").unwrap(), TaintSinkType::ShellExec),
718 (Regex::new(r"sqlite3_exec\s*\(").unwrap(), TaintSinkType::SqlQuery),
720 ],
721 sanitizers: vec![
722 (Regex::new(r"\b(Int|Double|Float)\s*\(").unwrap(), SanitizerType::Numeric),
724 (Regex::new(r"addingPercentEncoding\s*\(").unwrap(), SanitizerType::Html),
726 ],
727 };
728}
729
730lazy_static! {
731 static ref CSHARP_PATTERNS: LanguagePatterns = LanguagePatterns {
733 sources: vec![
734 (Regex::new(r"Console\.ReadLine\s*\(").unwrap(), TaintSourceType::UserInput),
736 (Regex::new(r"Request\.(QueryString|Form)\[").unwrap(), TaintSourceType::HttpParam),
738 (Regex::new(r"Environment\.GetEnvironmentVariable\s*\(").unwrap(), TaintSourceType::EnvVar),
740 (Regex::new(r"(File\.(ReadAllText|ReadAllLines|OpenRead)\s*\(|StreamReader\s*\()").unwrap(), TaintSourceType::FileRead),
742 ],
743 sinks: vec![
744 (Regex::new(r"Process\.Start\s*\(").unwrap(), TaintSinkType::ShellExec),
746 (Regex::new(r"(SqlCommand\s*\(|\.ExecuteNonQuery\s*\(|\.ExecuteReader\s*\()").unwrap(), TaintSinkType::SqlQuery),
748 (Regex::new(r"Activator\.CreateInstance\s*\(").unwrap(), TaintSinkType::CodeEval),
750 ],
751 sanitizers: vec![
752 (Regex::new(r"(int\.Parse|Convert\.ToInt32|double\.Parse)\s*\(").unwrap(), SanitizerType::Numeric),
754 (Regex::new(r"HttpUtility\.HtmlEncode\s*\(").unwrap(), SanitizerType::Html),
756 ],
757 };
758}
759
760lazy_static! {
761 static ref SCALA_PATTERNS: LanguagePatterns = LanguagePatterns {
763 sources: vec![
764 (Regex::new(r"(StdIn\.readLine\s*\(|scala\.io\.StdIn)").unwrap(), TaintSourceType::UserInput),
766 (Regex::new(r"System\.getenv\s*\(").unwrap(), TaintSourceType::EnvVar),
768 (Regex::new(r"Source\.fromFile\s*\(").unwrap(), TaintSourceType::FileRead),
770 ],
771 sinks: vec![
772 (Regex::new(r"(Runtime\.getRuntime\.exec\s*\(|sys\.process|Process\s*\()").unwrap(), TaintSinkType::ShellExec),
774 (Regex::new(r"\.(execute|executeQuery)\s*\(").unwrap(), TaintSinkType::SqlQuery),
776 ],
777 sanitizers: vec![
778 (Regex::new(r"\.(toInt|toLong|toDouble)\b").unwrap(), SanitizerType::Numeric),
780 (Regex::new(r"StringEscapeUtils\.escapeHtml").unwrap(), SanitizerType::Html),
782 ],
783 };
784}
785
786lazy_static! {
787 static ref PHP_PATTERNS: LanguagePatterns = LanguagePatterns {
789 sources: vec![
790 (Regex::new(r"\$_(GET|REQUEST|COOKIE|SERVER)\[").unwrap(), TaintSourceType::HttpParam),
792 (Regex::new(r"\$_POST\[").unwrap(), TaintSourceType::HttpBody),
794 (Regex::new(r"\bfgets\s*\(").unwrap(), TaintSourceType::UserInput),
796 (Regex::new(r"file_get_contents\s*\(").unwrap(), TaintSourceType::FileRead),
798 (Regex::new(r"(getenv\s*\(|\$_ENV\[)").unwrap(), TaintSourceType::EnvVar),
800 ],
801 sinks: vec![
802 (Regex::new(r"\beval\s*\(").unwrap(), TaintSinkType::CodeEval),
804 (Regex::new(r"\b(exec|system|passthru|shell_exec|popen|proc_open)\s*\(").unwrap(), TaintSinkType::ShellExec),
806 (Regex::new(r"(mysqli_query\s*\(|->query\s*\()").unwrap(), TaintSinkType::SqlQuery),
808 ],
809 sanitizers: vec![
810 (Regex::new(r"(\b(intval|floatval)\s*\(|\(int\)|\(float\))").unwrap(), SanitizerType::Numeric),
812 (Regex::new(r"(htmlspecialchars|htmlentities)\s*\(").unwrap(), SanitizerType::Html),
814 (Regex::new(r"mysqli_real_escape_string\s*\(").unwrap(), SanitizerType::Shell),
816 ],
817 };
818}
819
820lazy_static! {
821 static ref LUA_PATTERNS: LanguagePatterns = LanguagePatterns {
823 sources: vec![
824 (Regex::new(r"io\.read\s*\(").unwrap(), TaintSourceType::UserInput),
826 (Regex::new(r"os\.getenv\s*\(").unwrap(), TaintSourceType::EnvVar),
828 (Regex::new(r"io\.open\s*\(").unwrap(), TaintSourceType::FileRead),
830 ],
831 sinks: vec![
832 (Regex::new(r"os\.execute\s*\(").unwrap(), TaintSinkType::ShellExec),
834 (Regex::new(r"io\.popen\s*\(").unwrap(), TaintSinkType::ShellExec),
836 (Regex::new(r"\b(loadstring|load|dofile|loadfile)\s*\(").unwrap(), TaintSinkType::CodeEval),
838 ],
839 sanitizers: vec![
840 (Regex::new(r"\btonumber\s*\(").unwrap(), SanitizerType::Numeric),
842 ],
843 };
844}
845
846lazy_static! {
847 static ref ELIXIR_PATTERNS: LanguagePatterns = LanguagePatterns {
849 sources: vec![
850 (Regex::new(r"IO\.gets\s*\(").unwrap(), TaintSourceType::UserInput),
852 (Regex::new(r"System\.get_env\s*\(").unwrap(), TaintSourceType::EnvVar),
854 (Regex::new(r"File\.(read|read!)\s*\(").unwrap(), TaintSourceType::FileRead),
856 ],
857 sinks: vec![
858 (Regex::new(r"System\.cmd\s*\(").unwrap(), TaintSinkType::ShellExec),
860 (Regex::new(r"Code\.eval_string\s*\(").unwrap(), TaintSinkType::CodeEval),
862 (Regex::new(r"Ecto\.Adapters\.SQL\.query\s*\(").unwrap(), TaintSinkType::SqlQuery),
864 ],
865 sanitizers: vec![
866 (Regex::new(r"String\.(to_integer|to_float)\s*\(").unwrap(), SanitizerType::Numeric),
868 (Regex::new(r"Phoenix\.HTML\.html_escape\s*\(").unwrap(), SanitizerType::Html),
870 ],
871 };
872}
873
874lazy_static! {
875 static ref OCAML_PATTERNS: LanguagePatterns = LanguagePatterns {
877 sources: vec![
878 (Regex::new(r"\bread_line\s").unwrap(), TaintSourceType::UserInput),
880 (Regex::new(r"Sys\.getenv\s").unwrap(), TaintSourceType::EnvVar),
882 (Regex::new(r"\binput_line\s").unwrap(), TaintSourceType::UserInput),
884 (Regex::new(r"In_channel\.(read_all|input_all)\s").unwrap(), TaintSourceType::FileRead),
886 ],
887 sinks: vec![
888 (Regex::new(r"Sys\.command\s").unwrap(), TaintSinkType::ShellExec),
890 (Regex::new(r"Unix\.execvp\s").unwrap(), TaintSinkType::ShellExec),
892 (Regex::new(r"Sqlite3\.exec\s").unwrap(), TaintSinkType::SqlQuery),
894 ],
895 sanitizers: vec![
896 (Regex::new(r"\b(int_of_string|float_of_string)\s").unwrap(), SanitizerType::Numeric),
898 ],
899 };
900}
901
902pub fn get_patterns(language: Language) -> &'static LanguagePatterns {
907 match language {
908 Language::Python => &PYTHON_PATTERNS,
909 Language::TypeScript | Language::JavaScript => &TYPESCRIPT_PATTERNS,
910 Language::Go => &GO_PATTERNS,
911 Language::Java => &JAVA_PATTERNS,
912 Language::Rust => &RUST_PATTERNS,
913 Language::C => &C_PATTERNS,
914 Language::Cpp => &CPP_PATTERNS,
915 Language::Ruby => &RUBY_PATTERNS,
916 Language::Kotlin => &KOTLIN_PATTERNS,
917 Language::Swift => &SWIFT_PATTERNS,
918 Language::CSharp => &CSHARP_PATTERNS,
919 Language::Scala => &SCALA_PATTERNS,
920 Language::Php => &PHP_PATTERNS,
921 Language::Lua | Language::Luau => &LUA_PATTERNS,
922 Language::Elixir => &ELIXIR_PATTERNS,
923 Language::Ocaml => &OCAML_PATTERNS,
924 }
925}
926
927pub fn detect_sources(statement: &str, line: u32, language: Language) -> Vec<TaintSource> {
944 let mut sources = Vec::new();
945 let patterns = get_patterns(language);
946
947 for (pattern, source_type) in patterns.sources.iter() {
948 if pattern.is_match(statement) {
949 if let Some(var) = extract_assigned_var(statement) {
951 sources.push(TaintSource {
952 var,
953 line,
954 source_type: *source_type,
955 statement: Some(statement.to_string()),
956 });
957 } else {
958 if let Some(var) = extract_call_arg(statement, pattern) {
961 sources.push(TaintSource {
962 var,
963 line,
964 source_type: *source_type,
965 statement: Some(statement.to_string()),
966 });
967 } else {
968 let var = extract_source_var_from_statement(statement);
972 if let Some(var) = var {
973 sources.push(TaintSource {
974 var,
975 line,
976 source_type: *source_type,
977 statement: Some(statement.to_string()),
978 });
979 }
980 }
981 }
982 }
983 }
984
985 sources
986}
987
988fn extract_source_var_from_statement(statement: &str) -> Option<String> {
996 if let Some(pos) = statement.find(">>") {
998 let after = statement[pos + 2..].trim();
999 let var = after.split_whitespace().next().unwrap_or("");
1000 let var = var.trim_end_matches(|c: char| !c.is_alphanumeric() && c != '_');
1001 if is_valid_identifier(var) {
1002 return Some(var.to_string());
1003 }
1004 }
1005
1006 if let Some(pos) = statement.find('&') {
1008 let after = &statement[pos + 1..];
1009 let var = after
1010 .split(|c: char| !c.is_alphanumeric() && c != '_')
1011 .next()
1012 .unwrap_or("");
1013 if is_valid_identifier(var) {
1014 return Some(var.to_string());
1015 }
1016 }
1017
1018 let tokens: Vec<&str> = statement.split_whitespace().collect();
1021 if tokens.len() >= 2 {
1022 for tok in tokens.iter().skip(1) {
1024 let var = tok.split('(').next().unwrap_or(tok);
1026 let var = var.trim_end_matches(|c: char| !c.is_alphanumeric() && c != '_');
1027 if is_valid_identifier(var) && var.len() > 1 {
1028 return Some(var.to_string());
1029 }
1030 }
1031 }
1032
1033 None
1034}
1035
1036pub fn detect_sinks(statement: &str, line: u32, language: Language) -> Vec<TaintSink> {
1053 let mut sinks = Vec::new();
1054 let patterns = get_patterns(language);
1055 for (pattern, sink_type) in patterns.sinks.iter() {
1056 if pattern.is_match(statement) {
1057 if let Some(var) = extract_call_arg(statement, pattern) {
1059 sinks.push(TaintSink {
1060 var,
1061 line,
1062 sink_type: *sink_type,
1063 tainted: false,
1064 statement: Some(statement.to_string()),
1065 });
1066 } else {
1067 if let Some(var) = extract_sink_var_from_statement(statement, pattern) {
1070 sinks.push(TaintSink {
1071 var,
1072 line,
1073 sink_type: *sink_type,
1074 tainted: false,
1075 statement: Some(statement.to_string()),
1076 });
1077 } else {
1078 let interp_vars = extract_interpolated_vars(statement);
1082 for var in interp_vars {
1083 sinks.push(TaintSink {
1084 var,
1085 line,
1086 sink_type: *sink_type,
1087 tainted: false,
1088 statement: Some(statement.to_string()),
1089 });
1090 }
1091 }
1092 }
1093 }
1094}
1095sinks
1096}
1097
1098fn extract_sink_var_from_statement(statement: &str, pattern: &Regex) -> Option<String> {
1107 if let Some(m) = pattern.find(statement) {
1108 let after = &statement[m.end()..];
1109 let after = after.trim();
1110
1111 if after.is_empty() || !after.starts_with('(') {
1113 if let Some(eq_pos) = statement.rfind('=') {
1115 let before_eq = if eq_pos > 0 {
1117 statement.as_bytes()[eq_pos - 1]
1118 } else {
1119 b' '
1120 };
1121 let after_eq = if eq_pos + 1 < statement.len() {
1122 statement.as_bytes()[eq_pos + 1]
1123 } else {
1124 b' '
1125 };
1126 if before_eq != b'='
1127 && before_eq != b'!'
1128 && before_eq != b'<'
1129 && before_eq != b'>'
1130 && after_eq != b'='
1131 {
1132 let rhs = statement[eq_pos + 1..].trim();
1133 let var = rhs
1134 .split(|c: char| !c.is_alphanumeric() && c != '_')
1135 .next()
1136 .unwrap_or("");
1137 if is_valid_identifier(var) {
1138 return Some(var.to_string());
1139 }
1140 }
1141 }
1142 }
1143
1144 let search_area = &statement[m.start()..];
1147 if let Some(open) = search_area.find('(') {
1148 let rest = &search_area[open + 1..];
1149 let end = rest.find([',', ')']).unwrap_or(rest.len());
1150 let arg = rest[..end].trim();
1151 if !arg.starts_with('"') && !arg.starts_with('\'') && !arg.is_empty() {
1152 let var_name = arg.split('.').next().unwrap_or(arg);
1153 let var_name = var_name.trim_start_matches('$');
1154 if is_valid_identifier(var_name) {
1155 return Some(var_name.to_string());
1156 }
1157 }
1158 }
1159
1160 if !after.is_empty() && !after.starts_with('(') {
1164 let token = after
1166 .split(|c: char| c.is_whitespace() || c == ';')
1167 .next()
1168 .unwrap_or("");
1169 let token = token.trim_end_matches(|c: char| !c.is_alphanumeric() && c != '_');
1170 if is_valid_identifier(token) {
1171 return Some(token.to_string());
1172 }
1173 }
1174
1175 if statement.contains(';') {
1178 for part in statement.split(';') {
1180 let part = part.trim();
1181 if pattern.is_match(part) {
1183 continue;
1184 }
1185 let var = part
1187 .split(|c: char| !c.is_alphanumeric() && c != '_')
1188 .find(|t| is_valid_identifier(t));
1189 if let Some(var) = var {
1190 return Some(var.to_string());
1191 }
1192 }
1193 }
1194 }
1195
1196 None
1197}
1198
1199pub fn detect_sanitizer(statement: &str, language: Language) -> Option<SanitizerType> {
1213 let patterns = get_patterns(language);
1214 for (pattern, sanitizer_type) in patterns.sanitizers.iter() {
1215 if pattern.is_match(statement) {
1216 return Some(*sanitizer_type);
1217 }
1218 }
1219 None
1220}
1221
1222pub fn is_sanitizer(statement: &str, language: Language) -> bool {
1233 detect_sanitizer(statement, language).is_some()
1234}
1235
1236pub fn find_sanitizers_in_statement(
1248 statement: &str,
1249 _line: u32,
1250 language: Language,
1251) -> Vec<(String, SanitizerType)> {
1252 let mut result = Vec::new();
1253 let patterns = get_patterns(language);
1254
1255 for (pattern, sanitizer_type) in patterns.sanitizers.iter() {
1256 if pattern.is_match(statement) {
1257 if let Some(var) = extract_assigned_var(statement) {
1259 result.push((var, *sanitizer_type));
1260 }
1261 }
1262 }
1263
1264 result
1265}
1266
1267fn extract_assigned_var(statement: &str) -> Option<String> {
1282 let trimmed = statement.trim();
1283
1284 if let Some(pos) = trimmed.find(":=") {
1286 let before = &trimmed[..pos];
1287 let var = before.trim().trim_start_matches('(').trim();
1288 if is_valid_identifier(var) {
1289 return Some(var.to_string());
1290 }
1291 if let Some(first) = var.split(',').next() {
1293 let first = first.trim();
1294 if is_valid_identifier(first) {
1295 return Some(first.to_string());
1296 }
1297 }
1298 }
1299
1300 if let Some(pos) = trimmed.find('=') {
1302 if pos > 0 && trimmed.chars().nth(pos.saturating_sub(1)) == Some('=') {
1304 return None;
1305 }
1306 if pos + 1 < trimmed.len() && trimmed.chars().nth(pos + 1) == Some('=') {
1307 return None;
1308 }
1309 if pos > 0 {
1311 let prev_char = trimmed.chars().nth(pos.saturating_sub(1));
1312 if prev_char == Some('!') || prev_char == Some('<') || prev_char == Some('>') {
1313 return None;
1314 }
1315 }
1316
1317 let before = &trimmed[..pos];
1318 let var_part = if let Some(colon_pos) = before.find(':') {
1320 &before[..colon_pos]
1321 } else {
1322 before
1323 };
1324 let var = var_part.trim();
1325 if is_valid_identifier(var) {
1326 return Some(var.to_string());
1327 }
1328
1329 let tokens: Vec<&str> = var.split_whitespace().collect();
1341 if tokens.len() >= 2 {
1342 let last = tokens[tokens.len() - 1];
1344 let clean = last.trim_start_matches('*').trim_start_matches('&');
1346 let check = clean.trim_start_matches('$');
1348 if !check.is_empty() && is_valid_identifier(check) {
1349 return Some(clean.to_string());
1350 }
1351 }
1352
1353 if var.contains('{') || var.contains('(') || var.contains('[') {
1356 let cleaned = var.replace(['{', '}', '(', ')', '[', ']', ':'], " ");
1358 let idents: Vec<&str> = cleaned
1359 .split_whitespace()
1360 .filter(|t| is_valid_identifier(t) && *t != "ok" && *t != "err")
1361 .collect();
1362 if let Some(last_ident) = idents.last() {
1363 return Some(last_ident.to_string());
1364 }
1365 }
1366
1367 if let Some(name) = var.strip_prefix('$') {
1369 if is_valid_identifier(name) {
1370 return Some(var.to_string());
1371 }
1372 }
1373 }
1374
1375 None
1380}
1381
1382fn extract_call_arg(statement: &str, pattern: &Regex) -> Option<String> {
1394 if let Some(m) = pattern.find(statement) {
1396 let after_match = &statement[m.end()..];
1397 let rest = after_match.strip_prefix('(').unwrap_or(after_match);
1400 let mut remaining = rest;
1402 loop {
1403 let end = remaining
1405 .find([',', ')'])
1406 .unwrap_or(remaining.len());
1407 let arg = remaining[..end].trim();
1408 if !arg.is_empty()
1410 && !arg.starts_with('"')
1411 && !arg.starts_with('\'')
1412 && !arg.starts_with("f\"")
1413 && !arg.starts_with("f'")
1414 && !arg.starts_with("r\"")
1415 && !arg.starts_with("r'")
1416 {
1417 let var_name = arg.split('.').next().unwrap_or(arg);
1419 let check_name = var_name.trim_start_matches('$');
1421 if is_valid_identifier(check_name) {
1422 return Some(var_name.to_string());
1423 }
1424 }
1425 if arg.contains('+') {
1427 for part in arg.split('+') {
1428 let part = part.trim();
1429 if !part.is_empty()
1430 && !part.starts_with('"')
1431 && !part.starts_with('\'')
1432 && !part.starts_with("f\"")
1433 && !part.starts_with("f'")
1434 {
1435 let var_name = part.split('.').next().unwrap_or(part);
1436 let check_name = var_name.trim_start_matches('$');
1437 if is_valid_identifier(check_name) {
1438 return Some(var_name.to_string());
1439 }
1440 }
1441 }
1442 }
1443 if end >= remaining.len() {
1445 break;
1446}
1447let next_char = remaining.as_bytes()[end];
1448if next_char == b')' {
1449 break;
1450}
1451remaining = &remaining[end + 1..];
1453}
1454}
1455None
1456}
1457
1458fn extract_interpolated_vars(statement: &str) -> Vec<String> {
1470 let mut vars = Vec::new();
1471
1472 let _chars = statement.chars().peekable();
1475 let mut i = 0;
1476 let bytes = statement.as_bytes();
1477
1478 while i < bytes.len() {
1479 let is_interp = match bytes[i] {
1481 b'{' => {
1482 i + 1 < bytes.len() && bytes[i + 1] != b'{'
1484 }
1485 b'$' | b'#' => {
1486 i + 1 < bytes.len() && bytes[i + 1] == b'{'
1488 }
1489 _ => false,
1490 };
1491
1492 if is_interp {
1493 let brace_start = if bytes[i] == b'{' { i } else { i + 1 };
1495 if brace_start + 1 < bytes.len() {
1496 if let Some(close) = statement[brace_start + 1..].find('}') {
1498 let inner = &statement[brace_start + 1..brace_start + 1 + close];
1499 let inner = inner.trim();
1500 let var_name = inner
1502 .split(|c: char| !c.is_alphanumeric() && c != '_')
1503 .next()
1504 .unwrap_or("");
1505 if is_valid_identifier(var_name) {
1506 vars.push(var_name.to_string());
1507 }
1508 i = brace_start + 1 + close + 1;
1509 continue;
1510 }
1511 }
1512 }
1513
1514 if i + 8 < bytes.len() && &statement[i..i + 8] == ".format(" {
1516 let args_start = i + 8;
1517 if let Some(close) = statement[args_start..].find(')') {
1518 let args_str = &statement[args_start..args_start + close];
1519 for arg in args_str.split(',') {
1520 let arg = arg.trim();
1521 let val = if let Some(eq_pos) = arg.find('=') {
1523 arg[eq_pos + 1..].trim()
1524 } else {
1525 arg
1526 };
1527 let var_name = val
1528 .split(|c: char| !c.is_alphanumeric() && c != '_')
1529 .next()
1530 .unwrap_or("");
1531 if is_valid_identifier(var_name) {
1532 vars.push(var_name.to_string());
1533 }
1534 }
1535 i = args_start + close + 1;
1536 continue;
1537 }
1538 }
1539
1540 if bytes[i] == b'%' && i > 0 {
1542 let before = statement[..i].trim_end();
1543 let after = statement[i + 1..].trim_start();
1544 if (before.ends_with('"') || before.ends_with('\'')) && !after.starts_with('%') {
1545 let args_str = if after.starts_with('(') {
1548 if let Some(close) = after.find(')') {
1549 &after[1..close]
1550 } else {
1551 ""
1552 }
1553 } else {
1554 after.split(|c: char| c.is_whitespace() || c == ')' || c == ',')
1556 .next()
1557 .unwrap_or("")
1558 };
1559 for arg in args_str.split(',') {
1560 let arg = arg.trim();
1561 let var_name = arg
1562 .split(|c: char| !c.is_alphanumeric() && c != '_')
1563 .next()
1564 .unwrap_or("");
1565 if is_valid_identifier(var_name) {
1566 vars.push(var_name.to_string());
1567 }
1568 }
1569 }
1570 }
1571
1572 i += 1;
1573 }
1574
1575 vars.sort();
1577 vars.dedup();
1578 vars
1579}
1580
1581fn is_valid_identifier(s: &str) -> bool {
1586 !s.is_empty()
1587 && s.chars()
1588 .next()
1589 .map(|c| c.is_alphabetic() || c == '_')
1590 .unwrap_or(false)
1591 && s.chars().all(|c| c.is_alphanumeric() || c == '_')
1592}
1593
1594fn identifier_in_text(text: &str, ident: &str) -> bool {
1599 let bytes = text.as_bytes();
1600 let ident_len = ident.len();
1601 if ident_len == 0 || ident_len > bytes.len() {
1602 return false;
1603 }
1604 let mut pos = 0;
1605 while pos + ident_len <= bytes.len() {
1606 match text[pos..].find(ident) {
1607 Some(offset) => {
1608 let abs = pos + offset;
1609 let before_ok = abs == 0 || {
1610 let c = bytes[abs - 1];
1611 !c.is_ascii_alphanumeric() && c != b'_'
1612 };
1613 let after_pos = abs + ident_len;
1614 let after_ok = after_pos >= bytes.len() || {
1615 let c = bytes[after_pos];
1616 !c.is_ascii_alphanumeric() && c != b'_'
1617 };
1618 if before_ok && after_ok {
1619 return true;
1620 }
1621 pos = abs + 1;
1622 }
1623 None => break,
1624 }
1625 }
1626 false
1627}
1628
1629#[allow(dead_code)]
1641pub fn is_constant_string(statement: &str) -> bool {
1642 lazy_static! {
1644 static ref CONST_STRING: Regex = Regex::new(r#"^\s*\w+\s*=\s*["'][^"']*["']\s*$"#).unwrap();
1645 }
1646 CONST_STRING.is_match(statement)
1647}
1648
1649#[allow(dead_code)]
1662pub fn is_orm_safe_pattern(statement: &str) -> bool {
1663 lazy_static! {
1664 static ref ORM_SAFE: Regex =
1666 Regex::new(r"(\.filter\s*\(|\.where\s*\(|\.filter_by\s*\()").unwrap();
1667 }
1668 ORM_SAFE.is_match(statement)
1669}
1670
1671pub use detect_sinks as find_sinks_in_statement;
1673pub use detect_sources as find_sources_in_statement;
1674
1675use super::ast_utils::{
1689 call_node_kinds, extract_call_name, find_parent_assignment_var, is_in_comment, is_in_string,
1690 node_text, walk_descendants,
1691};
1692
1693struct AstSourcePattern {
1695 call_names: &'static [&'static str],
1697 member_patterns: &'static [&'static str],
1700 source_type: TaintSourceType,
1702}
1703
1704struct AstSinkPattern {
1706 call_names: &'static [&'static str],
1707 member_patterns: &'static [&'static str],
1708 sink_type: TaintSinkType,
1709}
1710
1711struct AstSanitizerPattern {
1713 call_names: &'static [&'static str],
1714 member_patterns: &'static [&'static str],
1715 sanitizer_type: SanitizerType,
1716}
1717
1718struct AstLanguagePatterns {
1720 sources: &'static [AstSourcePattern],
1721 sinks: &'static [AstSinkPattern],
1722 sanitizers: &'static [AstSanitizerPattern],
1723}
1724
1725static PYTHON_AST_SOURCES: &[AstSourcePattern] = &[
1730 AstSourcePattern {
1731 call_names: &["input"],
1732 member_patterns: &[],
1733 source_type: TaintSourceType::UserInput,
1734 },
1735 AstSourcePattern {
1736 call_names: &[],
1737 member_patterns: &[
1738 "request.args",
1739 "request.form",
1740 "request.values",
1741 "request.cookies",
1742 "request.headers",
1743 ],
1744 source_type: TaintSourceType::HttpParam,
1745 },
1746 AstSourcePattern {
1747 call_names: &[],
1748 member_patterns: &["request.json", "request.data"],
1749 source_type: TaintSourceType::HttpParam,
1750 },
1751 AstSourcePattern {
1752 call_names: &[],
1753 member_patterns: &["request.get_json"],
1754 source_type: TaintSourceType::HttpBody,
1755 },
1756 AstSourcePattern {
1757 call_names: &[],
1758 member_patterns: &["sys.stdin"],
1759 source_type: TaintSourceType::Stdin,
1760 },
1761 AstSourcePattern {
1762 call_names: &[],
1763 member_patterns: &["os.environ", "os.getenv"],
1764 source_type: TaintSourceType::EnvVar,
1765 },
1766 AstSourcePattern {
1767 call_names: &[],
1768 member_patterns: &[".read(", ".readlines(", ".readline("],
1769 source_type: TaintSourceType::FileRead,
1770 },
1771];
1772
1773static PYTHON_AST_SINKS: &[AstSinkPattern] = &[
1774 AstSinkPattern {
1775 call_names: &[],
1776 member_patterns: &[".execute(", ".executemany("],
1777 sink_type: TaintSinkType::SqlQuery,
1778 },
1779 AstSinkPattern {
1780 call_names: &["eval"],
1781 member_patterns: &[],
1782 sink_type: TaintSinkType::CodeEval,
1783 },
1784 AstSinkPattern {
1785 call_names: &["exec"],
1786 member_patterns: &[],
1787 sink_type: TaintSinkType::CodeExec,
1788 },
1789 AstSinkPattern {
1790 call_names: &["compile"],
1791 member_patterns: &[],
1792 sink_type: TaintSinkType::CodeCompile,
1793 },
1794 AstSinkPattern {
1795 call_names: &[],
1796 member_patterns: &[
1797 "subprocess.run",
1798 "subprocess.call",
1799 "subprocess.Popen",
1800 "subprocess.check_output",
1801 ],
1802 sink_type: TaintSinkType::ShellExec,
1803 },
1804 AstSinkPattern {
1805 call_names: &[],
1806 member_patterns: &["os.system", "os.popen"],
1807 sink_type: TaintSinkType::ShellExec,
1808 },
1809 AstSinkPattern {
1810 call_names: &[],
1811 member_patterns: &[".write("],
1812 sink_type: TaintSinkType::FileWrite,
1813 },
1814];
1815
1816static PYTHON_AST_SANITIZERS: &[AstSanitizerPattern] = &[
1817 AstSanitizerPattern {
1818 call_names: &["int", "float", "bool"],
1819 member_patterns: &[],
1820 sanitizer_type: SanitizerType::Numeric,
1821 },
1822 AstSanitizerPattern {
1823 call_names: &[],
1824 member_patterns: &["shlex.quote", "pipes.quote"],
1825 sanitizer_type: SanitizerType::Shell,
1826 },
1827 AstSanitizerPattern {
1828 call_names: &[],
1829 member_patterns: &["html.escape", "markupsafe.escape", "cgi.escape"],
1830 sanitizer_type: SanitizerType::Html,
1831 },
1832];
1833
1834static TYPESCRIPT_AST_SOURCES: &[AstSourcePattern] = &[
1835 AstSourcePattern {
1836 call_names: &[],
1837 member_patterns: &["req.body"],
1838 source_type: TaintSourceType::HttpBody,
1839 },
1840 AstSourcePattern {
1841 call_names: &[],
1842 member_patterns: &["req.params", "req.query", "req.cookies", "req.headers"],
1843 source_type: TaintSourceType::HttpParam,
1844 },
1845 AstSourcePattern {
1846 call_names: &[],
1847 member_patterns: &["process.env"],
1848 source_type: TaintSourceType::EnvVar,
1849 },
1850 AstSourcePattern {
1851 call_names: &[],
1852 member_patterns: &["process.stdin"],
1853 source_type: TaintSourceType::Stdin,
1854 },
1855 AstSourcePattern {
1856 call_names: &["readline"],
1857 member_patterns: &[],
1858 source_type: TaintSourceType::UserInput,
1859 },
1860 AstSourcePattern {
1861 call_names: &[],
1862 member_patterns: &[".read(", ".readFile("],
1863 source_type: TaintSourceType::FileRead,
1864 },
1865];
1866
1867static TYPESCRIPT_AST_SINKS: &[AstSinkPattern] = &[
1868 AstSinkPattern {
1869 call_names: &["eval"],
1870 member_patterns: &[],
1871 sink_type: TaintSinkType::CodeEval,
1872 },
1873 AstSinkPattern {
1874 call_names: &[],
1875 member_patterns: &["new Function"],
1876 sink_type: TaintSinkType::CodeEval,
1877 },
1878 AstSinkPattern {
1879 call_names: &[],
1880 member_patterns: &[
1881 "child_process.exec",
1882 "child_process.spawn",
1883 "child_process.execSync",
1884 "child_process.execFile",
1885 ],
1886 sink_type: TaintSinkType::ShellExec,
1887 },
1888 AstSinkPattern {
1889 call_names: &["execSync"],
1890 member_patterns: &[],
1891 sink_type: TaintSinkType::ShellExec,
1892 },
1893 AstSinkPattern {
1894 call_names: &[],
1895 member_patterns: &[".innerHTML"],
1896 sink_type: TaintSinkType::FileWrite,
1897 },
1898 AstSinkPattern {
1899 call_names: &[],
1900 member_patterns: &["document.write"],
1901 sink_type: TaintSinkType::FileWrite,
1902 },
1903 AstSinkPattern {
1904 call_names: &[],
1905 member_patterns: &[".query(", ".execute("],
1906 sink_type: TaintSinkType::SqlQuery,
1907 },
1908];
1909
1910static TYPESCRIPT_AST_SANITIZERS: &[AstSanitizerPattern] = &[
1911 AstSanitizerPattern {
1912 call_names: &["parseInt", "Number", "parseFloat"],
1913 member_patterns: &[],
1914 sanitizer_type: SanitizerType::Numeric,
1915 },
1916 AstSanitizerPattern {
1917 call_names: &["encodeURIComponent"],
1918 member_patterns: &["DOMPurify.sanitize"],
1919 sanitizer_type: SanitizerType::Html,
1920 },
1921];
1922
1923static GO_AST_SOURCES: &[AstSourcePattern] = &[
1924 AstSourcePattern {
1925 call_names: &[],
1926 member_patterns: &["fmt.Scan", "bufio.NewReader", "bufio.NewScanner"],
1927 source_type: TaintSourceType::UserInput,
1928 },
1929 AstSourcePattern {
1930 call_names: &[],
1931 member_patterns: &["r.FormValue", "r.PostFormValue", "r.URL.Query", ".Query()"],
1932 source_type: TaintSourceType::HttpParam,
1933 },
1934 AstSourcePattern {
1935 call_names: &[],
1936 member_patterns: &["r.Body", ".ReadAll(r.Body)"],
1937 source_type: TaintSourceType::HttpBody,
1938 },
1939 AstSourcePattern {
1940 call_names: &[],
1941 member_patterns: &["os.Getenv"],
1942 source_type: TaintSourceType::EnvVar,
1943 },
1944 AstSourcePattern {
1945 call_names: &[],
1946 member_patterns: &["os.Stdin"],
1947 source_type: TaintSourceType::Stdin,
1948 },
1949 AstSourcePattern {
1950 call_names: &[],
1951 member_patterns: &["os.Open", "ioutil.ReadFile"],
1952 source_type: TaintSourceType::FileRead,
1953 },
1954];
1955
1956static GO_AST_SINKS: &[AstSinkPattern] = &[
1957 AstSinkPattern {
1958 call_names: &[],
1959 member_patterns: &["exec.Command"],
1960 sink_type: TaintSinkType::ShellExec,
1961 },
1962 AstSinkPattern {
1963 call_names: &[],
1964 member_patterns: &["db.Exec", "db.Query", "db.QueryRow"],
1965 sink_type: TaintSinkType::SqlQuery,
1966 },
1967 AstSinkPattern {
1968 call_names: &[],
1969 member_patterns: &["template.HTML", "fmt.Fprintf"],
1970 sink_type: TaintSinkType::FileWrite,
1971 },
1972];
1973
1974static GO_AST_SANITIZERS: &[AstSanitizerPattern] = &[
1975 AstSanitizerPattern {
1976 call_names: &[],
1977 member_patterns: &["strconv.Atoi", "strconv.ParseInt", "strconv.ParseFloat"],
1978 sanitizer_type: SanitizerType::Numeric,
1979 },
1980 AstSanitizerPattern {
1981 call_names: &[],
1982 member_patterns: &["html.EscapeString", "url.QueryEscape"],
1983 sanitizer_type: SanitizerType::Html,
1984 },
1985];
1986
1987static JAVA_AST_SOURCES: &[AstSourcePattern] = &[
1988 AstSourcePattern {
1989 call_names: &[],
1990 member_patterns: &["new Scanner(System.in)"],
1991 source_type: TaintSourceType::Stdin,
1992 },
1993 AstSourcePattern {
1994 call_names: &["readLine"],
1995 member_patterns: &["new BufferedReader"],
1996 source_type: TaintSourceType::UserInput,
1997 },
1998 AstSourcePattern {
1999 call_names: &[],
2000 member_patterns: &["request.getParameter", "getQueryString"],
2001 source_type: TaintSourceType::HttpParam,
2002 },
2003 AstSourcePattern {
2004 call_names: &[],
2005 member_patterns: &["System.getenv"],
2006 source_type: TaintSourceType::EnvVar,
2007 },
2008 AstSourcePattern {
2009 call_names: &[],
2010 member_patterns: &["new FileReader", "Files.readAllLines"],
2011 source_type: TaintSourceType::FileRead,
2012 },
2013];
2014
2015static JAVA_AST_SINKS: &[AstSinkPattern] = &[
2016 AstSinkPattern {
2017 call_names: &[],
2018 member_patterns: &["Runtime.getRuntime().exec", "ProcessBuilder"],
2019 sink_type: TaintSinkType::ShellExec,
2020 },
2021 AstSinkPattern {
2022 call_names: &[],
2023 member_patterns: &[".execute(", ".executeQuery(", ".executeUpdate("],
2024 sink_type: TaintSinkType::SqlQuery,
2025 },
2026 AstSinkPattern {
2027 call_names: &[],
2028 member_patterns: &["Class.forName"],
2029 sink_type: TaintSinkType::CodeEval,
2030 },
2031];
2032
2033static JAVA_AST_SANITIZERS: &[AstSanitizerPattern] = &[
2034 AstSanitizerPattern {
2035 call_names: &[],
2036 member_patterns: &["Integer.parseInt", "Long.parseLong", "Double.parseDouble"],
2037 sanitizer_type: SanitizerType::Numeric,
2038 },
2039 AstSanitizerPattern {
2040 call_names: &[],
2041 member_patterns: &["ESAPI.encoder", "StringEscapeUtils.escapeHtml"],
2042 sanitizer_type: SanitizerType::Html,
2043 },
2044];
2045
2046static RUST_AST_SOURCES: &[AstSourcePattern] = &[
2047 AstSourcePattern {
2048 call_names: &[],
2049 member_patterns: &["io::stdin", "std::io::stdin"],
2050 source_type: TaintSourceType::Stdin,
2051 },
2052 AstSourcePattern {
2053 call_names: &[],
2054 member_patterns: &["env::var", "std::env::var"],
2055 source_type: TaintSourceType::EnvVar,
2056 },
2057 AstSourcePattern {
2058 call_names: &[],
2059 member_patterns: &["env::args", "std::env::args"],
2060 source_type: TaintSourceType::UserInput,
2061 },
2062 AstSourcePattern {
2063 call_names: &[],
2064 member_patterns: &[
2065 "fs::read_to_string",
2066 "std::fs::read_to_string",
2067 "File::open",
2068 ],
2069 source_type: TaintSourceType::FileRead,
2070 },
2071];
2072
2073static RUST_AST_SINKS: &[AstSinkPattern] = &[
2074 AstSinkPattern {
2075 call_names: &[],
2076 member_patterns: &["Command::new", "std::process::Command"],
2077 sink_type: TaintSinkType::ShellExec,
2078 },
2079 AstSinkPattern {
2080 call_names: &[],
2081 member_patterns: &["unsafe"],
2082 sink_type: TaintSinkType::CodeEval,
2083 },
2084 AstSinkPattern {
2085 call_names: &[],
2086 member_patterns: &["std::ptr::write", "std::ptr::read"],
2087 sink_type: TaintSinkType::FileWrite,
2088 },
2089];
2090
2091static RUST_AST_SANITIZERS: &[AstSanitizerPattern] = &[AstSanitizerPattern {
2092 call_names: &[],
2093 member_patterns: &[
2094 ".parse::<i32>",
2095 ".parse::<i64>",
2096 ".parse::<u32>",
2097 ".parse::<u64>",
2098 ".parse::<f32>",
2099 ".parse::<f64>",
2100 ".parse::<usize>",
2101 ".parse::<isize>",
2102 ],
2103 sanitizer_type: SanitizerType::Numeric,
2104}];
2105
2106static C_AST_SOURCES: &[AstSourcePattern] = &[
2107 AstSourcePattern {
2108 call_names: &["scanf", "fscanf", "sscanf", "fgets", "gets", "getchar"],
2109 member_patterns: &[],
2110 source_type: TaintSourceType::UserInput,
2111 },
2112 AstSourcePattern {
2113 call_names: &["getenv"],
2114 member_patterns: &[],
2115 source_type: TaintSourceType::EnvVar,
2116 },
2117 AstSourcePattern {
2118 call_names: &["fread", "fopen"],
2119 member_patterns: &[],
2120 source_type: TaintSourceType::FileRead,
2121 },
2122 AstSourcePattern {
2123 call_names: &["recv", "recvfrom"],
2124 member_patterns: &[],
2125 source_type: TaintSourceType::UserInput,
2126 },
2127];
2128
2129static C_AST_SINKS: &[AstSinkPattern] = &[
2130 AstSinkPattern {
2131 call_names: &["system", "popen", "execl", "execv", "execvp"],
2132 member_patterns: &[],
2133 sink_type: TaintSinkType::ShellExec,
2134 },
2135 AstSinkPattern {
2136 call_names: &["sprintf", "vsprintf"],
2137 member_patterns: &[],
2138 sink_type: TaintSinkType::ShellExec,
2139 },
2140 AstSinkPattern {
2141 call_names: &["strcpy", "strcat", "strncpy"],
2142 member_patterns: &[],
2143 sink_type: TaintSinkType::FileWrite,
2144 },
2145];
2146
2147static C_AST_SANITIZERS: &[AstSanitizerPattern] = &[
2148 AstSanitizerPattern {
2149 call_names: &["atoi", "atol", "atof", "strtol", "strtoul", "strtod"],
2150 member_patterns: &[],
2151 sanitizer_type: SanitizerType::Numeric,
2152 },
2153 AstSanitizerPattern {
2154 call_names: &["snprintf"],
2155 member_patterns: &[],
2156 sanitizer_type: SanitizerType::Shell,
2157 },
2158];
2159
2160static CPP_AST_SOURCES: &[AstSourcePattern] = &[
2161 AstSourcePattern {
2162 call_names: &["getline"],
2163 member_patterns: &["std::cin", "std::getline"],
2164 source_type: TaintSourceType::UserInput,
2165 },
2166 AstSourcePattern {
2167 call_names: &["getenv"],
2168 member_patterns: &[],
2169 source_type: TaintSourceType::EnvVar,
2170 },
2171 AstSourcePattern {
2172 call_names: &[],
2173 member_patterns: &["std::ifstream", "std::fstream"],
2174 source_type: TaintSourceType::FileRead,
2175 },
2176];
2177
2178static CPP_AST_SINKS: &[AstSinkPattern] = &[
2179 AstSinkPattern {
2180 call_names: &["system", "popen"],
2181 member_patterns: &["std::system"],
2182 sink_type: TaintSinkType::ShellExec,
2183 },
2184 AstSinkPattern {
2185 call_names: &["sprintf"],
2186 member_patterns: &[],
2187 sink_type: TaintSinkType::ShellExec,
2188 },
2189];
2190
2191static CPP_AST_SANITIZERS: &[AstSanitizerPattern] = &[
2192 AstSanitizerPattern {
2193 call_names: &[],
2194 member_patterns: &[
2195 "std::stoi",
2196 "std::stol",
2197 "std::stoul",
2198 "std::stoll",
2199 "std::stof",
2200 "std::stod",
2201 ],
2202 sanitizer_type: SanitizerType::Numeric,
2203 },
2204 AstSanitizerPattern {
2205 call_names: &[],
2206 member_patterns: &[
2207 "static_cast<int>",
2208 "static_cast<long>",
2209 "static_cast<float>",
2210 "static_cast<double>",
2211 ],
2212 sanitizer_type: SanitizerType::Numeric,
2213 },
2214];
2215
2216static RUBY_AST_SOURCES: &[AstSourcePattern] = &[
2217 AstSourcePattern {
2218 call_names: &["gets"],
2219 member_patterns: &[],
2220 source_type: TaintSourceType::UserInput,
2221 },
2222 AstSourcePattern {
2223 call_names: &[],
2224 member_patterns: &["STDIN.read", "STDIN.gets", "STDIN.readline"],
2225 source_type: TaintSourceType::Stdin,
2226 },
2227 AstSourcePattern {
2228 call_names: &[],
2229 member_patterns: &["params["],
2230 source_type: TaintSourceType::HttpParam,
2231 },
2232 AstSourcePattern {
2233 call_names: &[],
2234 member_patterns: &["ENV["],
2235 source_type: TaintSourceType::EnvVar,
2236 },
2237 AstSourcePattern {
2238 call_names: &[],
2239 member_patterns: &["File.read", "File.open"],
2240 source_type: TaintSourceType::FileRead,
2241 },
2242];
2243
2244static RUBY_AST_SINKS: &[AstSinkPattern] = &[
2245 AstSinkPattern {
2246 call_names: &["eval"],
2247 member_patterns: &[],
2248 sink_type: TaintSinkType::CodeEval,
2249 },
2250 AstSinkPattern {
2251 call_names: &["system", "exec"],
2252 member_patterns: &[],
2253 sink_type: TaintSinkType::ShellExec,
2254 },
2255 AstSinkPattern {
2256 call_names: &[],
2257 member_patterns: &["IO.popen"],
2258 sink_type: TaintSinkType::ShellExec,
2259 },
2260 AstSinkPattern {
2261 call_names: &[],
2262 member_patterns: &[".send("],
2263 sink_type: TaintSinkType::CodeEval,
2264 },
2265];
2266
2267static RUBY_AST_SANITIZERS: &[AstSanitizerPattern] = &[
2268 AstSanitizerPattern {
2269 call_names: &[],
2270 member_patterns: &[".to_i", ".to_f"],
2271 sanitizer_type: SanitizerType::Numeric,
2272 },
2273 AstSanitizerPattern {
2274 call_names: &[],
2275 member_patterns: &["CGI.escapeHTML", "Rack::Utils.escape_html"],
2276 sanitizer_type: SanitizerType::Html,
2277 },
2278];
2279
2280static KOTLIN_AST_SOURCES: &[AstSourcePattern] = &[
2281 AstSourcePattern {
2282 call_names: &["readLine", "readln"],
2283 member_patterns: &[],
2284 source_type: TaintSourceType::UserInput,
2285 },
2286 AstSourcePattern {
2287 call_names: &[],
2288 member_patterns: &["System.getenv"],
2289 source_type: TaintSourceType::EnvVar,
2290 },
2291 AstSourcePattern {
2292 call_names: &[],
2293 member_patterns: &["BufferedReader"],
2294 source_type: TaintSourceType::UserInput,
2295 },
2296 AstSourcePattern {
2297 call_names: &[],
2298 member_patterns: &["request.getParameter"],
2299 source_type: TaintSourceType::HttpParam,
2300 },
2301];
2302
2303static KOTLIN_AST_SINKS: &[AstSinkPattern] = &[
2304 AstSinkPattern {
2305 call_names: &[],
2306 member_patterns: &["Runtime.getRuntime().exec", "ProcessBuilder"],
2307 sink_type: TaintSinkType::ShellExec,
2308 },
2309 AstSinkPattern {
2310 call_names: &[],
2311 member_patterns: &[".execute(", ".executeQuery(", "prepareStatement"],
2312 sink_type: TaintSinkType::SqlQuery,
2313 },
2314];
2315
2316static KOTLIN_AST_SANITIZERS: &[AstSanitizerPattern] = &[AstSanitizerPattern {
2317 call_names: &[],
2318 member_patterns: &[".toInt()", ".toLong()", ".toDouble()", ".toFloat()"],
2319 sanitizer_type: SanitizerType::Numeric,
2320}];
2321
2322static SWIFT_AST_SOURCES: &[AstSourcePattern] = &[
2323 AstSourcePattern {
2324 call_names: &["readLine"],
2325 member_patterns: &[],
2326 source_type: TaintSourceType::UserInput,
2327 },
2328 AstSourcePattern {
2329 call_names: &[],
2330 member_patterns: &["ProcessInfo.processInfo.environment"],
2331 source_type: TaintSourceType::EnvVar,
2332 },
2333 AstSourcePattern {
2334 call_names: &[],
2335 member_patterns: &["FileManager.default", "URLSession"],
2336 source_type: TaintSourceType::FileRead,
2337 },
2338];
2339
2340static SWIFT_AST_SINKS: &[AstSinkPattern] = &[
2341 AstSinkPattern {
2342 call_names: &[],
2343 member_patterns: &["Process()", "NSTask"],
2344 sink_type: TaintSinkType::ShellExec,
2345 },
2346 AstSinkPattern {
2347 call_names: &["sqlite3_exec"],
2348 member_patterns: &[],
2349 sink_type: TaintSinkType::SqlQuery,
2350 },
2351];
2352
2353static SWIFT_AST_SANITIZERS: &[AstSanitizerPattern] = &[
2354 AstSanitizerPattern {
2355 call_names: &["Int", "Double", "Float"],
2356 member_patterns: &[],
2357 sanitizer_type: SanitizerType::Numeric,
2358 },
2359 AstSanitizerPattern {
2360 call_names: &[],
2361 member_patterns: &["addingPercentEncoding"],
2362 sanitizer_type: SanitizerType::Html,
2363 },
2364];
2365
2366static CSHARP_AST_SOURCES: &[AstSourcePattern] = &[
2367 AstSourcePattern {
2368 call_names: &[],
2369 member_patterns: &["Console.ReadLine"],
2370 source_type: TaintSourceType::UserInput,
2371 },
2372 AstSourcePattern {
2373 call_names: &[],
2374 member_patterns: &["Request.QueryString", "Request.Form"],
2375 source_type: TaintSourceType::HttpParam,
2376 },
2377 AstSourcePattern {
2378 call_names: &[],
2379 member_patterns: &["Environment.GetEnvironmentVariable"],
2380 source_type: TaintSourceType::EnvVar,
2381 },
2382 AstSourcePattern {
2383 call_names: &[],
2384 member_patterns: &[
2385 "File.ReadAllText",
2386 "File.ReadAllLines",
2387 "File.OpenRead",
2388 "StreamReader",
2389 ],
2390 source_type: TaintSourceType::FileRead,
2391 },
2392];
2393
2394static CSHARP_AST_SINKS: &[AstSinkPattern] = &[
2395 AstSinkPattern {
2396 call_names: &[],
2397 member_patterns: &["Process.Start"],
2398 sink_type: TaintSinkType::ShellExec,
2399 },
2400 AstSinkPattern {
2401 call_names: &[],
2402 member_patterns: &["SqlCommand", ".ExecuteNonQuery", ".ExecuteReader"],
2403 sink_type: TaintSinkType::SqlQuery,
2404 },
2405 AstSinkPattern {
2406 call_names: &[],
2407 member_patterns: &["Activator.CreateInstance"],
2408 sink_type: TaintSinkType::CodeEval,
2409 },
2410];
2411
2412static CSHARP_AST_SANITIZERS: &[AstSanitizerPattern] = &[
2413 AstSanitizerPattern {
2414 call_names: &[],
2415 member_patterns: &["int.Parse", "Convert.ToInt32", "double.Parse"],
2416 sanitizer_type: SanitizerType::Numeric,
2417 },
2418 AstSanitizerPattern {
2419 call_names: &[],
2420 member_patterns: &["HttpUtility.HtmlEncode"],
2421 sanitizer_type: SanitizerType::Html,
2422 },
2423];
2424
2425static SCALA_AST_SOURCES: &[AstSourcePattern] = &[
2426 AstSourcePattern {
2427 call_names: &[],
2428 member_patterns: &["StdIn.readLine", "scala.io.StdIn"],
2429 source_type: TaintSourceType::UserInput,
2430 },
2431 AstSourcePattern {
2432 call_names: &[],
2433 member_patterns: &["System.getenv"],
2434 source_type: TaintSourceType::EnvVar,
2435 },
2436 AstSourcePattern {
2437 call_names: &[],
2438 member_patterns: &["Source.fromFile"],
2439 source_type: TaintSourceType::FileRead,
2440 },
2441];
2442
2443static SCALA_AST_SINKS: &[AstSinkPattern] = &[
2444 AstSinkPattern {
2445 call_names: &[],
2446 member_patterns: &["Runtime.getRuntime.exec", "sys.process", "Process("],
2447 sink_type: TaintSinkType::ShellExec,
2448 },
2449 AstSinkPattern {
2450 call_names: &[],
2451 member_patterns: &[".execute(", ".executeQuery("],
2452 sink_type: TaintSinkType::SqlQuery,
2453 },
2454];
2455
2456static SCALA_AST_SANITIZERS: &[AstSanitizerPattern] = &[
2457 AstSanitizerPattern {
2458 call_names: &[],
2459 member_patterns: &[".toInt", ".toLong", ".toDouble"],
2460 sanitizer_type: SanitizerType::Numeric,
2461 },
2462 AstSanitizerPattern {
2463 call_names: &[],
2464 member_patterns: &["StringEscapeUtils.escapeHtml"],
2465 sanitizer_type: SanitizerType::Html,
2466 },
2467];
2468
2469static PHP_AST_SOURCES: &[AstSourcePattern] = &[
2470 AstSourcePattern {
2471 call_names: &[],
2472 member_patterns: &["$_GET[", "$_REQUEST[", "$_COOKIE[", "$_SERVER["],
2473 source_type: TaintSourceType::HttpParam,
2474 },
2475 AstSourcePattern {
2476 call_names: &[],
2477 member_patterns: &["$_POST["],
2478 source_type: TaintSourceType::HttpBody,
2479 },
2480 AstSourcePattern {
2481 call_names: &["fgets"],
2482 member_patterns: &[],
2483 source_type: TaintSourceType::UserInput,
2484 },
2485 AstSourcePattern {
2486 call_names: &["file_get_contents"],
2487 member_patterns: &[],
2488 source_type: TaintSourceType::FileRead,
2489 },
2490 AstSourcePattern {
2491 call_names: &["getenv"],
2492 member_patterns: &["$_ENV["],
2493 source_type: TaintSourceType::EnvVar,
2494 },
2495];
2496
2497static PHP_AST_SINKS: &[AstSinkPattern] = &[
2498 AstSinkPattern {
2499 call_names: &["eval"],
2500 member_patterns: &[],
2501 sink_type: TaintSinkType::CodeEval,
2502 },
2503 AstSinkPattern {
2504 call_names: &[
2505 "exec",
2506 "system",
2507 "passthru",
2508 "shell_exec",
2509 "popen",
2510 "proc_open",
2511 ],
2512 member_patterns: &[],
2513 sink_type: TaintSinkType::ShellExec,
2514 },
2515 AstSinkPattern {
2516 call_names: &["mysqli_query"],
2517 member_patterns: &["->query("],
2518 sink_type: TaintSinkType::SqlQuery,
2519 },
2520];
2521
2522static PHP_AST_SANITIZERS: &[AstSanitizerPattern] = &[
2523 AstSanitizerPattern {
2524 call_names: &["intval", "floatval"],
2525 member_patterns: &["(int)", "(float)"],
2526 sanitizer_type: SanitizerType::Numeric,
2527 },
2528 AstSanitizerPattern {
2529 call_names: &["htmlspecialchars", "htmlentities"],
2530 member_patterns: &[],
2531 sanitizer_type: SanitizerType::Html,
2532 },
2533 AstSanitizerPattern {
2534 call_names: &["mysqli_real_escape_string"],
2535 member_patterns: &[],
2536 sanitizer_type: SanitizerType::Shell,
2537 },
2538];
2539
2540static LUA_AST_SOURCES: &[AstSourcePattern] = &[
2541 AstSourcePattern {
2542 call_names: &[],
2543 member_patterns: &["io.read"],
2544 source_type: TaintSourceType::UserInput,
2545 },
2546 AstSourcePattern {
2547 call_names: &[],
2548 member_patterns: &["os.getenv"],
2549 source_type: TaintSourceType::EnvVar,
2550 },
2551 AstSourcePattern {
2552 call_names: &[],
2553 member_patterns: &["io.open"],
2554 source_type: TaintSourceType::FileRead,
2555 },
2556];
2557
2558static LUA_AST_SINKS: &[AstSinkPattern] = &[
2559 AstSinkPattern {
2560 call_names: &[],
2561 member_patterns: &["os.execute"],
2562 sink_type: TaintSinkType::ShellExec,
2563 },
2564 AstSinkPattern {
2565 call_names: &[],
2566 member_patterns: &["io.popen"],
2567 sink_type: TaintSinkType::ShellExec,
2568 },
2569 AstSinkPattern {
2570 call_names: &["loadstring", "load", "dofile", "loadfile"],
2571 member_patterns: &[],
2572 sink_type: TaintSinkType::CodeEval,
2573 },
2574];
2575
2576static LUA_AST_SANITIZERS: &[AstSanitizerPattern] = &[AstSanitizerPattern {
2577 call_names: &["tonumber"],
2578 member_patterns: &[],
2579 sanitizer_type: SanitizerType::Numeric,
2580}];
2581
2582static ELIXIR_AST_SOURCES: &[AstSourcePattern] = &[
2583 AstSourcePattern {
2584 call_names: &[],
2585 member_patterns: &["IO.gets"],
2586 source_type: TaintSourceType::UserInput,
2587 },
2588 AstSourcePattern {
2589 call_names: &[],
2590 member_patterns: &["System.get_env"],
2591 source_type: TaintSourceType::EnvVar,
2592 },
2593 AstSourcePattern {
2594 call_names: &[],
2595 member_patterns: &["File.read", "File.read!"],
2596 source_type: TaintSourceType::FileRead,
2597 },
2598];
2599
2600static ELIXIR_AST_SINKS: &[AstSinkPattern] = &[
2601 AstSinkPattern {
2602 call_names: &[],
2603 member_patterns: &["System.cmd"],
2604 sink_type: TaintSinkType::ShellExec,
2605 },
2606 AstSinkPattern {
2607 call_names: &[],
2608 member_patterns: &["Code.eval_string"],
2609 sink_type: TaintSinkType::CodeEval,
2610 },
2611 AstSinkPattern {
2612 call_names: &[],
2613 member_patterns: &["Ecto.Adapters.SQL.query"],
2614 sink_type: TaintSinkType::SqlQuery,
2615 },
2616];
2617
2618static ELIXIR_AST_SANITIZERS: &[AstSanitizerPattern] = &[
2619 AstSanitizerPattern {
2620 call_names: &[],
2621 member_patterns: &["String.to_integer", "String.to_float"],
2622 sanitizer_type: SanitizerType::Numeric,
2623 },
2624 AstSanitizerPattern {
2625 call_names: &[],
2626 member_patterns: &["Phoenix.HTML.html_escape"],
2627 sanitizer_type: SanitizerType::Html,
2628 },
2629];
2630
2631static OCAML_AST_SOURCES: &[AstSourcePattern] = &[
2632 AstSourcePattern {
2633 call_names: &["read_line"],
2634 member_patterns: &[],
2635 source_type: TaintSourceType::UserInput,
2636 },
2637 AstSourcePattern {
2638 call_names: &["input_line"],
2639 member_patterns: &[],
2640 source_type: TaintSourceType::UserInput,
2641 },
2642 AstSourcePattern {
2643 call_names: &[],
2644 member_patterns: &["Sys.getenv"],
2645 source_type: TaintSourceType::EnvVar,
2646 },
2647 AstSourcePattern {
2648 call_names: &[],
2649 member_patterns: &["In_channel.read_all", "In_channel.input_all"],
2650 source_type: TaintSourceType::FileRead,
2651 },
2652];
2653
2654static OCAML_AST_SINKS: &[AstSinkPattern] = &[
2655 AstSinkPattern {
2656 call_names: &[],
2657 member_patterns: &["Sys.command"],
2658 sink_type: TaintSinkType::ShellExec,
2659 },
2660 AstSinkPattern {
2661 call_names: &[],
2662 member_patterns: &["Unix.execvp"],
2663 sink_type: TaintSinkType::ShellExec,
2664 },
2665 AstSinkPattern {
2666 call_names: &[],
2667 member_patterns: &["Sqlite3.exec"],
2668 sink_type: TaintSinkType::SqlQuery,
2669 },
2670];
2671
2672static OCAML_AST_SANITIZERS: &[AstSanitizerPattern] = &[AstSanitizerPattern {
2673 call_names: &["int_of_string", "float_of_string"],
2674 member_patterns: &[],
2675 sanitizer_type: SanitizerType::Numeric,
2676}];
2677
2678fn get_ast_patterns(language: Language) -> AstLanguagePatterns {
2680 match language {
2681 Language::Python => AstLanguagePatterns {
2682 sources: PYTHON_AST_SOURCES,
2683 sinks: PYTHON_AST_SINKS,
2684 sanitizers: PYTHON_AST_SANITIZERS,
2685 },
2686 Language::TypeScript | Language::JavaScript => AstLanguagePatterns {
2687 sources: TYPESCRIPT_AST_SOURCES,
2688 sinks: TYPESCRIPT_AST_SINKS,
2689 sanitizers: TYPESCRIPT_AST_SANITIZERS,
2690 },
2691 Language::Go => AstLanguagePatterns {
2692 sources: GO_AST_SOURCES,
2693 sinks: GO_AST_SINKS,
2694 sanitizers: GO_AST_SANITIZERS,
2695 },
2696 Language::Java => AstLanguagePatterns {
2697 sources: JAVA_AST_SOURCES,
2698 sinks: JAVA_AST_SINKS,
2699 sanitizers: JAVA_AST_SANITIZERS,
2700 },
2701 Language::Rust => AstLanguagePatterns {
2702 sources: RUST_AST_SOURCES,
2703 sinks: RUST_AST_SINKS,
2704 sanitizers: RUST_AST_SANITIZERS,
2705 },
2706 Language::C => AstLanguagePatterns {
2707 sources: C_AST_SOURCES,
2708 sinks: C_AST_SINKS,
2709 sanitizers: C_AST_SANITIZERS,
2710 },
2711 Language::Cpp => AstLanguagePatterns {
2712 sources: CPP_AST_SOURCES,
2713 sinks: CPP_AST_SINKS,
2714 sanitizers: CPP_AST_SANITIZERS,
2715 },
2716 Language::Ruby => AstLanguagePatterns {
2717 sources: RUBY_AST_SOURCES,
2718 sinks: RUBY_AST_SINKS,
2719 sanitizers: RUBY_AST_SANITIZERS,
2720 },
2721 Language::Kotlin => AstLanguagePatterns {
2722 sources: KOTLIN_AST_SOURCES,
2723 sinks: KOTLIN_AST_SINKS,
2724 sanitizers: KOTLIN_AST_SANITIZERS,
2725 },
2726 Language::Swift => AstLanguagePatterns {
2727 sources: SWIFT_AST_SOURCES,
2728 sinks: SWIFT_AST_SINKS,
2729 sanitizers: SWIFT_AST_SANITIZERS,
2730 },
2731 Language::CSharp => AstLanguagePatterns {
2732 sources: CSHARP_AST_SOURCES,
2733 sinks: CSHARP_AST_SINKS,
2734 sanitizers: CSHARP_AST_SANITIZERS,
2735 },
2736 Language::Scala => AstLanguagePatterns {
2737 sources: SCALA_AST_SOURCES,
2738 sinks: SCALA_AST_SINKS,
2739 sanitizers: SCALA_AST_SANITIZERS,
2740 },
2741 Language::Php => AstLanguagePatterns {
2742 sources: PHP_AST_SOURCES,
2743 sinks: PHP_AST_SINKS,
2744 sanitizers: PHP_AST_SANITIZERS,
2745 },
2746 Language::Lua | Language::Luau => AstLanguagePatterns {
2747 sources: LUA_AST_SOURCES,
2748 sinks: LUA_AST_SINKS,
2749 sanitizers: LUA_AST_SANITIZERS,
2750 },
2751 Language::Elixir => AstLanguagePatterns {
2752 sources: ELIXIR_AST_SOURCES,
2753 sinks: ELIXIR_AST_SINKS,
2754 sanitizers: ELIXIR_AST_SANITIZERS,
2755 },
2756 Language::Ocaml => AstLanguagePatterns {
2757 sources: OCAML_AST_SOURCES,
2758 sinks: OCAML_AST_SINKS,
2759 sanitizers: OCAML_AST_SANITIZERS,
2760 },
2761 }
2762}
2763
2764pub fn detect_sources_ast(
2783 root: &tree_sitter::Node,
2784 source: &[u8],
2785 language: Language,
2786 line_filter: Option<u32>,
2787) -> Vec<TaintSource> {
2788 let patterns = get_ast_patterns(language);
2789 let mut sources = Vec::new();
2790 let descendants = walk_descendants(*root);
2791
2792 for descendant in &descendants {
2793 if is_in_comment(descendant, language) || is_in_string(descendant, language) {
2795 continue;
2796 }
2797
2798 let line = descendant.start_position().row as u32 + 1;
2799 if let Some(filter) = line_filter {
2800 if line != filter {
2801 continue;
2802 }
2803 }
2804
2805 let text = node_text(descendant, source);
2806
2807 for pattern in patterns.sources {
2808 let matched = pattern.call_names.iter().any(|name| {
2809 let call_kinds = call_node_kinds(language);
2811 if call_kinds.contains(&descendant.kind()) {
2812 if let Some(call_name) = extract_call_name(descendant, source, language) {
2813 return call_name == *name || call_name.ends_with(&format!(".{}", name));
2814 }
2815 }
2816 false
2817 }) || pattern.member_patterns.iter().any(|mp| text.contains(mp));
2818
2819 if matched {
2820 let var = find_parent_assignment_var(descendant, source, language).or_else(|| {
2822 extract_assigned_var(
2823 std::str::from_utf8(source)
2824 .unwrap_or("")
2825 .lines()
2826 .nth((line - 1) as usize)
2827 .unwrap_or(""),
2828 )
2829 });
2830
2831 if let Some(var) = var {
2832 sources.push(TaintSource {
2833 var,
2834 line,
2835 source_type: pattern.source_type,
2836 statement: Some(
2837 std::str::from_utf8(source)
2838 .unwrap_or("")
2839 .lines()
2840 .nth((line - 1) as usize)
2841 .unwrap_or("")
2842 .to_string(),
2843 ),
2844 });
2845 break; }
2847 }
2848 }
2849 }
2850
2851 sources
2852}
2853
2854pub fn detect_sinks_ast(
2858 root: &tree_sitter::Node,
2859 source: &[u8],
2860 language: Language,
2861 line_filter: Option<u32>,
2862) -> Vec<TaintSink> {
2863 let patterns = get_ast_patterns(language);
2864 let mut sinks = Vec::new();
2865 let descendants = walk_descendants(*root);
2866
2867 for descendant in &descendants {
2868 if is_in_comment(descendant, language) || is_in_string(descendant, language) {
2869 continue;
2870 }
2871
2872 let line = descendant.start_position().row as u32 + 1;
2873 if let Some(filter) = line_filter {
2874 if line != filter {
2875 continue;
2876 }
2877 }
2878
2879 let text = node_text(descendant, source);
2880
2881 for pattern in patterns.sinks {
2882 let matched = pattern.call_names.iter().any(|name| {
2883 let call_kinds = call_node_kinds(language);
2884 if call_kinds.contains(&descendant.kind()) {
2885 if let Some(call_name) = extract_call_name(descendant, source, language) {
2886 return call_name == *name || call_name.ends_with(&format!(".{}", name));
2887 }
2888 }
2889 false
2890 }) || pattern.member_patterns.iter().any(|mp| text.contains(mp));
2891
2892 if matched {
2893 let stmt_text = std::str::from_utf8(source)
2894 .unwrap_or("")
2895 .lines()
2896 .nth((line - 1) as usize)
2897 .unwrap_or("");
2898
2899 let regex_patterns = get_patterns(language);
2901 let var = regex_patterns
2902 .sinks
2903 .iter()
2904 .find(|(p, _)| p.is_match(stmt_text))
2905 .and_then(|(p, _)| extract_call_arg(stmt_text, p))
2906 .or_else(|| {
2907 regex_patterns
2908 .sinks
2909 .iter()
2910 .find(|(p, _)| p.is_match(stmt_text))
2911 .and_then(|(p, _)| extract_sink_var_from_statement(stmt_text, p))
2912 });
2913
2914 if let Some(var) = var {
2915 sinks.push(TaintSink {
2916 var,
2917 line,
2918 sink_type: pattern.sink_type,
2919 tainted: false,
2920 statement: Some(stmt_text.to_string()),
2921 });
2922 break;
2923 }
2924 }
2925 }
2926 }
2927
2928 sinks
2929}
2930
2931pub fn detect_sanitizer_ast(
2936 root: &tree_sitter::Node,
2937 source: &[u8],
2938 language: Language,
2939 line: u32,
2940) -> Option<SanitizerType> {
2941 let patterns = get_ast_patterns(language);
2942 let descendants = walk_descendants(*root);
2943
2944 for descendant in &descendants {
2945 if is_in_comment(descendant, language) || is_in_string(descendant, language) {
2946 continue;
2947 }
2948
2949 let node_line = descendant.start_position().row as u32 + 1;
2950 if node_line != line {
2951 continue;
2952 }
2953
2954 let text = node_text(descendant, source);
2955
2956 for pattern in patterns.sanitizers {
2957 let matched = pattern.call_names.iter().any(|name| {
2958 let call_kinds = call_node_kinds(language);
2959 if call_kinds.contains(&descendant.kind()) {
2960 if let Some(call_name) = extract_call_name(descendant, source, language) {
2961 return call_name == *name;
2962 }
2963 }
2964 false
2965 }) || pattern.member_patterns.iter().any(|mp| text.contains(mp));
2966
2967 if matched {
2968 return Some(pattern.sanitizer_type);
2969 }
2970 }
2971 }
2972
2973 None
2974}
2975
2976pub fn compute_taint_with_tree(
2985 cfg: &CfgInfo,
2986 refs: &[VarRef],
2987 statements: &HashMap<u32, String>,
2988 tree: Option<&tree_sitter::Tree>,
2989 source: Option<&[u8]>,
2990 language: Language,
2991) -> Result<TaintInfo, TldrError> {
2992 validate_cfg(cfg)?;
2999
3000 let mut result = TaintInfo::new(&cfg.function);
3001
3002 let predecessors = build_predecessors(cfg);
3004 let successors = build_successors(cfg);
3005 let line_to_block = build_line_to_block(cfg);
3006 let refs_by_block = build_refs_by_block(refs, &line_to_block);
3007
3008 if let (Some(tree), Some(src)) = (tree, source) {
3010 let root = tree.root_node();
3014
3015 let all_ast_sources = detect_sources_ast(&root, src, language, None);
3016 let all_ast_sinks = detect_sinks_ast(&root, src, language, None);
3017
3018 let mut ast_sources_by_line: HashMap<u32, Vec<TaintSource>> = HashMap::new();
3020 for s in all_ast_sources {
3021 ast_sources_by_line.entry(s.line).or_default().push(s);
3022 }
3023 let mut ast_sinks_by_line: HashMap<u32, Vec<TaintSink>> = HashMap::new();
3024 for s in all_ast_sinks {
3025 ast_sinks_by_line.entry(s.line).or_default().push(s);
3026 }
3027
3028 for (&line, stmt) in statements {
3029 if let Some(sources) = ast_sources_by_line.remove(&line) {
3031 result.sources.extend(sources);
3032 } else {
3033 result.sources.extend(detect_sources(stmt, line, language));
3034 }
3035
3036 if let Some(sinks) = ast_sinks_by_line.remove(&line) {
3040 result.sinks.extend(sinks);
3041 }
3042 result.sinks.extend(detect_sinks(stmt, line, language));
3043 }
3044 } else {
3045 for (&line, stmt) in statements {
3047 result.sources.extend(detect_sources(stmt, line, language));
3048 result.sinks.extend(detect_sinks(stmt, line, language));
3049 }
3050 }
3051
3052 result.sources.sort_by(|a, b| {
3054 a.line
3055 .cmp(&b.line)
3056 .then_with(|| format!("{:?}", a.source_type).cmp(&format!("{:?}", b.source_type)))
3057 .then_with(|| a.var.cmp(&b.var))
3058 });
3059 result.sources.dedup_by(|a, b| {
3060 a.line == b.line
3061 && a.var == b.var
3062 && std::mem::discriminant(&a.source_type) == std::mem::discriminant(&b.source_type)
3063 });
3064
3065 result.sinks.sort_by(|a, b| {
3067 a.line
3068 .cmp(&b.line)
3069 .then_with(|| format!("{:?}", a.sink_type).cmp(&format!("{:?}", b.sink_type)))
3070 .then_with(|| a.var.cmp(&b.var))
3071 });
3072 result.sinks.dedup_by(|a, b| {
3073 a.line == b.line
3074 && a.var == b.var
3075 && std::mem::discriminant(&a.sink_type) == std::mem::discriminant(&b.sink_type)
3076 });
3077
3078 let block_ids: Vec<usize> = cfg.blocks.iter().map(|b| b.id).collect();
3082 let mut tainted: HashMap<usize, HashSet<String>> = HashMap::new();
3083 for &bid in &block_ids {
3084 tainted.insert(bid, HashSet::new());
3085 }
3086
3087 for source in &result.sources {
3088 if let Some(&block_id) = line_to_block.get(&source.line) {
3089 tainted
3090 .entry(block_id)
3091 .or_default()
3092 .insert(source.var.clone());
3093 }
3094 }
3095
3096 let unique_vars: HashSet<&str> = refs.iter().map(|r| r.name.as_str()).collect();
3099 let computed_max = block_ids.len() * unique_vars.len().max(1) + 10;
3100 let max_iterations = computed_max.min(MAX_TAINT_ITERATIONS);
3101 let mut worklist: VecDeque<usize> = block_ids.iter().cloned().collect();
3102 let mut iterations = 0;
3103 let mut iteration_limit_reached = false;
3104
3105 let mut source_vars_by_block: HashMap<usize, HashSet<String>> = HashMap::new();
3106 for source in &result.sources {
3107 if let Some(&block_id) = line_to_block.get(&source.line) {
3108 source_vars_by_block
3109 .entry(block_id)
3110 .or_default()
3111 .insert(source.var.clone());
3112 }
3113 }
3114
3115 while let Some(block_id) = worklist.pop_front() {
3116 if iterations >= max_iterations {
3117 iteration_limit_reached = true;
3118 break;
3119 }
3120 iterations += 1;
3121
3122 let mut taint_in: HashSet<String> = predecessors
3123 .get(&block_id)
3124 .map(|preds| {
3125 preds
3126 .iter()
3127 .flat_map(|p| tainted.get(p).cloned().unwrap_or_default())
3128 .collect()
3129 })
3130 .unwrap_or_default();
3131
3132 if let Some(source_vars) = source_vars_by_block.get(&block_id) {
3133 taint_in.extend(source_vars.clone());
3134 }
3135
3136 let taint_out = process_block(
3137 block_id,
3138 taint_in,
3139 &refs_by_block,
3140 statements,
3141 &line_to_block,
3142 &mut result.sanitized_vars,
3143 language,
3144 );
3145
3146 let old_taint = tainted.get(&block_id).cloned().unwrap_or_default();
3147 if taint_out != old_taint {
3148 tainted.insert(block_id, taint_out);
3149 if let Some(succs) = successors.get(&block_id) {
3150 for &s in succs {
3151 if !worklist.contains(&s) {
3152 worklist.push_back(s);
3153 }
3154 }
3155 }
3156 }
3157 }
3158
3159 if iteration_limit_reached {
3160 result.convergence = Some("iteration_limit_reached".to_string());
3161 }
3162
3163 result.tainted_vars = tainted.clone();
3164
3165 for sink in &mut result.sinks {
3167 if let Some(&sink_block) = line_to_block.get(&sink.line) {
3168 if let Some(tainted_at_block) = tainted.get(&sink_block) {
3169 if tainted_at_block.contains(&sink.var) {
3171 sink.tainted = true;
3172 } else if !tainted_at_block.is_empty() {
3173 if let Some(block) = cfg.blocks.iter().find(|b| b.id == sink_block) {
3178 let block_text: String = (block.lines.0..=block.lines.1)
3179 .filter_map(|l| statements.get(&l))
3180 .map(|s| s.as_str())
3181 .collect::<Vec<_>>()
3182 .join(" ");
3183 for tvar in tainted_at_block {
3184 if identifier_in_text(&block_text, tvar) {
3185 sink.tainted = true;
3186 break;
3187 }
3188 }
3189 }
3190 }
3191 }
3192 }
3193 }
3194
3195 let sources_clone = result.sources.clone();
3196 let sinks_snapshot: Vec<(String, u32, TaintSinkType, bool, Option<String>)> = result
3197 .sinks
3198 .iter()
3199 .map(|s| {
3200 (
3201 s.var.clone(),
3202 s.line,
3203 s.sink_type,
3204 s.tainted,
3205 s.statement.clone(),
3206 )
3207 })
3208 .collect();
3209
3210 for (sink_var, sink_line, sink_type, sink_tainted, sink_statement) in sinks_snapshot {
3211 if !sink_tainted {
3212 continue;
3213 }
3214
3215 if let Some(&sink_block) = line_to_block.get(&sink_line) {
3216 for source in &sources_clone {
3217 if let Some(&source_block) = line_to_block.get(&source.line) {
3218 if flows_to(&source.var, &sink_var, &tainted, &predecessors, sink_block) {
3219 let is_sanitized = result.sanitized_vars.contains(&sink_var);
3220 if !is_sanitized {
3221 let path = compute_flow_path(source_block, sink_block, &successors);
3222 let flow = TaintFlow {
3223 source: source.clone(),
3224 sink: TaintSink {
3225 var: sink_var.clone(),
3226 line: sink_line,
3227 sink_type,
3228 tainted: true,
3229 statement: sink_statement.clone(),
3230 },
3231 path,
3232 };
3233 result.flows.push(flow);
3234 }
3235 }
3236 }
3237 }
3238 }
3239 }
3240
3241 Ok(result)
3242}
3243
3244fn flows_to(
3266 _source_var: &str,
3267 target_var: &str,
3268 tainted_vars: &HashMap<usize, HashSet<String>>,
3269 _predecessors: &HashMap<usize, Vec<usize>>,
3270 target_block: usize,
3271) -> bool {
3272 tainted_vars
3276 .get(&target_block)
3277 .map(|t| t.contains(target_var))
3278 .unwrap_or(false)
3279}
3280
3281fn compute_flow_path(
3296 source_block: usize,
3297 sink_block: usize,
3298 successors: &HashMap<usize, Vec<usize>>,
3299) -> Vec<usize> {
3300 if source_block == sink_block {
3301 return vec![source_block];
3302 }
3303
3304 let mut visited: HashSet<usize> = HashSet::new();
3306 let mut queue: VecDeque<Vec<usize>> = VecDeque::new();
3307
3308 queue.push_back(vec![source_block]);
3309 visited.insert(source_block);
3310
3311 while let Some(path) = queue.pop_front() {
3312 let current = *path.last().unwrap();
3313
3314 if let Some(succs) = successors.get(¤t) {
3315 for &next in succs {
3316 if next == sink_block {
3317 let mut result = path.clone();
3318 result.push(next);
3319 return result;
3320 }
3321
3322 if !visited.contains(&next) {
3323 visited.insert(next);
3324 let mut new_path = path.clone();
3325 new_path.push(next);
3326 queue.push_back(new_path);
3327 }
3328 }
3329 }
3330 }
3331
3332 vec![source_block, sink_block]
3334}
3335
3336pub fn compute_taint(
3367 cfg: &CfgInfo,
3368 refs: &[VarRef],
3369 statements: &HashMap<u32, String>,
3370 language: Language,
3371) -> Result<TaintInfo, TldrError> {
3372 validate_cfg(cfg)?;
3374
3375 let mut result = TaintInfo::new(&cfg.function);
3376
3377 let predecessors = build_predecessors(cfg);
3379 let successors = build_successors(cfg);
3380 let line_to_block = build_line_to_block(cfg);
3381 let refs_by_block = build_refs_by_block(refs, &line_to_block);
3382
3383 for (&line, stmt) in statements {
3385 for source in detect_sources(stmt, line, language) {
3386 result.sources.push(source);
3387 }
3388 for sink in detect_sinks(stmt, line, language) {
3389 result.sinks.push(sink);
3390 }
3391 }
3392
3393 let block_ids: Vec<usize> = cfg.blocks.iter().map(|b| b.id).collect();
3395 let mut tainted: HashMap<usize, HashSet<String>> = HashMap::new();
3396 for &bid in &block_ids {
3397 tainted.insert(bid, HashSet::new());
3398 }
3399
3400 for source in &result.sources {
3402 if let Some(&block_id) = line_to_block.get(&source.line) {
3403 tainted
3404 .entry(block_id)
3405 .or_default()
3406 .insert(source.var.clone());
3407 }
3408 }
3409
3410 let unique_vars: HashSet<&str> = refs.iter().map(|r| r.name.as_str()).collect();
3413 let computed_max = block_ids.len() * unique_vars.len().max(1) + 10;
3414 let max_iterations = computed_max.min(MAX_TAINT_ITERATIONS);
3415 let mut worklist: VecDeque<usize> = block_ids.iter().cloned().collect();
3416 let mut iterations = 0;
3417 let mut iteration_limit_reached = false;
3418
3419 let mut source_vars_by_block: HashMap<usize, HashSet<String>> = HashMap::new();
3421 for source in &result.sources {
3422 if let Some(&block_id) = line_to_block.get(&source.line) {
3423 source_vars_by_block
3424 .entry(block_id)
3425 .or_default()
3426 .insert(source.var.clone());
3427 }
3428 }
3429
3430 while let Some(block_id) = worklist.pop_front() {
3431 if iterations >= max_iterations {
3432 iteration_limit_reached = true;
3433 break; }
3435 iterations += 1;
3436
3437 let mut taint_in: HashSet<String> = predecessors
3439 .get(&block_id)
3440 .map(|preds| {
3441 preds
3442 .iter()
3443 .flat_map(|p| tainted.get(p).cloned().unwrap_or_default())
3444 .collect()
3445 })
3446 .unwrap_or_default();
3447
3448 if let Some(source_vars) = source_vars_by_block.get(&block_id) {
3450 taint_in.extend(source_vars.clone());
3451 }
3452
3453 let taint_out = process_block(
3455 block_id,
3456 taint_in,
3457 &refs_by_block,
3458 statements,
3459 &line_to_block,
3460 &mut result.sanitized_vars,
3461 language,
3462 );
3463
3464 let old_taint = tainted.get(&block_id).cloned().unwrap_or_default();
3466 if taint_out != old_taint {
3467 tainted.insert(block_id, taint_out);
3468 if let Some(succs) = successors.get(&block_id) {
3469 for &s in succs {
3470 if !worklist.contains(&s) {
3471 worklist.push_back(s);
3472 }
3473 }
3474 }
3475 }
3476 }
3477
3478 if iteration_limit_reached {
3479 result.convergence = Some("iteration_limit_reached".to_string());
3480 }
3481
3482 result.tainted_vars = tainted.clone();
3483
3484 for sink in &mut result.sinks {
3491 if let Some(&sink_block) = line_to_block.get(&sink.line) {
3493 if let Some(tainted_at_block) = tainted.get(&sink_block) {
3495 if tainted_at_block.contains(&sink.var) {
3496 sink.tainted = true;
3497 }
3498 }
3499 }
3500 }
3501
3502 let sources_clone = result.sources.clone();
3505 let sinks_snapshot: Vec<(String, u32, TaintSinkType, bool, Option<String>)> = result
3506 .sinks
3507 .iter()
3508 .map(|s| {
3509 (
3510 s.var.clone(),
3511 s.line,
3512 s.sink_type,
3513 s.tainted,
3514 s.statement.clone(),
3515 )
3516 })
3517 .collect();
3518
3519 for (sink_var, sink_line, sink_type, sink_tainted, sink_statement) in sinks_snapshot {
3520 if !sink_tainted {
3521 continue;
3522 }
3523
3524 if let Some(&sink_block) = line_to_block.get(&sink_line) {
3526 for source in &sources_clone {
3528 if let Some(&source_block) = line_to_block.get(&source.line) {
3530 if flows_to(&source.var, &sink_var, &tainted, &predecessors, sink_block) {
3532 let is_sanitized = result.sanitized_vars.contains(&sink_var);
3534
3535 if !is_sanitized {
3537 let path = compute_flow_path(source_block, sink_block, &successors);
3538
3539 let flow = TaintFlow {
3540 source: source.clone(),
3541 sink: TaintSink {
3542 var: sink_var.clone(),
3543 line: sink_line,
3544 sink_type,
3545 tainted: true,
3546 statement: sink_statement.clone(),
3547 },
3548 path,
3549 };
3550
3551 result.flows.push(flow);
3552 }
3553 }
3554 }
3555 }
3556 }
3557 }
3558
3559 Ok(result)
3560}
3561
3562fn process_block(
3583 block_id: usize,
3584 mut current_taint: HashSet<String>,
3585 refs_by_block: &HashMap<usize, Vec<&VarRef>>,
3586 statements: &HashMap<u32, String>,
3587 _line_to_block: &HashMap<u32, usize>,
3588 sanitized_vars: &mut HashSet<String>,
3589 language: Language,
3590) -> HashSet<String> {
3591 let empty_refs = vec![];
3592 let block_refs = refs_by_block.get(&block_id).unwrap_or(&empty_refs);
3593
3594 for var_ref in block_refs {
3595 let stmt = statements
3596 .get(&var_ref.line)
3597 .map(|s| s.as_str())
3598 .unwrap_or("");
3599
3600 match var_ref.ref_type {
3601 RefType::Definition => {
3602 let rhs_tainted = current_taint.iter().any(|tv| stmt.contains(tv.as_str()));
3604
3605 if detect_sanitizer(stmt, language).is_some() {
3607 sanitized_vars.insert(var_ref.name.clone());
3608 current_taint.remove(&var_ref.name);
3609 } else if rhs_tainted {
3610 current_taint.insert(var_ref.name.clone());
3611 } else {
3612 current_taint.remove(&var_ref.name);
3614 }
3615 }
3616 RefType::Use => {
3617 }
3619 RefType::Update => {
3620 let rhs_tainted = current_taint.iter().any(|tv| stmt.contains(tv.as_str()));
3623 if rhs_tainted {
3624 current_taint.insert(var_ref.name.clone());
3625 }
3626 }
3627 }
3628 }
3629
3630 current_taint
3631}
3632
3633#[cfg(test)]
3634mod tests {
3635 use super::*;
3636
3637 #[test]
3638 fn test_taint_source_type_serde() {
3639 let source = TaintSourceType::UserInput;
3640 let json = serde_json::to_string(&source).unwrap();
3641 assert_eq!(json, "\"user_input\"");
3642
3643 let parsed: TaintSourceType = serde_json::from_str(&json).unwrap();
3644 assert_eq!(parsed, source);
3645 }
3646
3647 #[test]
3648 fn test_taint_sink_type_serde() {
3649 let sink = TaintSinkType::SqlQuery;
3650 let json = serde_json::to_string(&sink).unwrap();
3651 assert_eq!(json, "\"sql_query\"");
3652
3653 let parsed: TaintSinkType = serde_json::from_str(&json).unwrap();
3654 assert_eq!(parsed, sink);
3655 }
3656
3657 #[test]
3658 fn test_sanitizer_type_serde() {
3659 let sanitizer = SanitizerType::Numeric;
3660 let json = serde_json::to_string(&sanitizer).unwrap();
3661 assert_eq!(json, "\"numeric\"");
3662
3663 let parsed: SanitizerType = serde_json::from_str(&json).unwrap();
3664 assert_eq!(parsed, sanitizer);
3665 }
3666
3667 #[test]
3668 fn test_taint_info_new() {
3669 let info = TaintInfo::new("my_function");
3670 assert_eq!(info.function_name, "my_function");
3671 assert!(info.tainted_vars.is_empty());
3672 assert!(info.sources.is_empty());
3673 assert!(info.sinks.is_empty());
3674 assert!(info.flows.is_empty());
3675 assert!(info.sanitized_vars.is_empty());
3676 }
3677
3678 #[test]
3679 fn test_taint_info_default() {
3680 let info = TaintInfo::default();
3681 assert!(info.function_name.is_empty());
3682 assert!(info.tainted_vars.is_empty());
3683 }
3684
3685 #[test]
3686 fn test_taint_info_is_tainted() {
3687 let mut info = TaintInfo::new("test");
3688 let mut block_taint = HashSet::new();
3689 block_taint.insert("user_input".to_string());
3690 info.tainted_vars.insert(0, block_taint);
3691
3692 assert!(info.is_tainted(0, "user_input"));
3693 assert!(!info.is_tainted(0, "other_var"));
3694 assert!(!info.is_tainted(1, "user_input")); }
3696
3697 #[test]
3698 fn test_taint_info_get_vulnerabilities() {
3699 let mut info = TaintInfo::new("test");
3700
3701 info.sinks.push(TaintSink {
3703 var: "query".to_string(),
3704 line: 5,
3705 sink_type: TaintSinkType::SqlQuery,
3706 tainted: true,
3707 statement: Some("cursor.execute(query)".to_string()),
3708 });
3709
3710 info.sinks.push(TaintSink {
3712 var: "safe_query".to_string(),
3713 line: 10,
3714 sink_type: TaintSinkType::SqlQuery,
3715 tainted: false,
3716 statement: Some("cursor.execute(safe_query)".to_string()),
3717 });
3718
3719 let vulns = info.get_vulnerabilities();
3720 assert_eq!(vulns.len(), 1);
3721 assert_eq!(vulns[0].var, "query");
3722 }
3723
3724 #[test]
3728 fn test_taint_terminates_on_large_cfg_with_backedges() {
3729 use crate::types::{BlockType, CfgBlock, CfgEdge, CfgInfo, EdgeType, RefType, VarRef};
3730
3731 let num_blocks = 50;
3733 let mut blocks = Vec::new();
3734 let mut edges = Vec::new();
3735
3736 for i in 0..num_blocks {
3737 let start_line = (i * 10 + 1) as u32;
3738 let end_line = (i * 10 + 10) as u32;
3739 blocks.push(CfgBlock {
3740 id: i,
3741 block_type: BlockType::Body,
3742 lines: (start_line, end_line),
3743 calls: Vec::new(),
3744 });
3745 }
3746
3747 for i in 0..num_blocks - 1 {
3749 edges.push(CfgEdge {
3750 from: i,
3751 to: i + 1,
3752 edge_type: EdgeType::Unconditional,
3753 condition: None,
3754 });
3755 }
3756
3757 for i in (5..num_blocks).step_by(5) {
3759 edges.push(CfgEdge {
3760 from: i,
3761 to: i - 3,
3762 edge_type: EdgeType::BackEdge,
3763 condition: None,
3764 });
3765 }
3766
3767 let cfg = CfgInfo {
3768 function: "large_func".to_string(),
3769 blocks,
3770 edges,
3771 entry_block: 0,
3772 exit_blocks: vec![num_blocks - 1],
3773 cyclomatic_complexity: 10,
3774 nested_functions: HashMap::new(),
3775 };
3776
3777 let mut refs = Vec::new();
3779 let mut statements = HashMap::new();
3780
3781 for i in 0..num_blocks {
3782 let line = (i * 10 + 1) as u32;
3783 let var_name = format!("var_{}", i);
3784 refs.push(VarRef {
3785 name: var_name.clone(),
3786 ref_type: RefType::Definition,
3787 line,
3788 column: 0,
3789 context: None,
3790 group_id: None,
3791 });
3792 if i > 0 {
3794 statements.insert(line, format!("var_{} = var_{}", i, i - 1));
3795 } else {
3796 statements.insert(line, "var_0 = input()".to_string());
3797 }
3798 }
3799
3800 let start = std::time::Instant::now();
3802 let result = compute_taint(&cfg, &refs, &statements, Language::Python);
3803 let elapsed = start.elapsed();
3804
3805 assert!(result.is_ok(), "compute_taint should succeed");
3806 assert!(
3807 elapsed.as_secs() < 5,
3808 "compute_taint took too long: {:?} (possible infinite loop)",
3809 elapsed
3810 );
3811
3812 let info = result.unwrap();
3814 assert!(!info.sources.is_empty(), "Should detect input() source");
3815 }
3816
3817 #[test]
3820 fn test_taint_iteration_cap_prevents_runaway() {
3821 use crate::types::{BlockType, CfgBlock, CfgEdge, CfgInfo, EdgeType, RefType, VarRef};
3822
3823 let blocks = vec![
3825 CfgBlock {
3826 id: 0,
3827 block_type: BlockType::Body,
3828 lines: (1, 100),
3829 calls: Vec::new(),
3830 },
3831 CfgBlock {
3832 id: 1,
3833 block_type: BlockType::Body,
3834 lines: (101, 200),
3835 calls: Vec::new(),
3836 },
3837 ];
3838 let edges = vec![
3839 CfgEdge {
3840 from: 0,
3841 to: 1,
3842 edge_type: EdgeType::Unconditional,
3843 condition: None,
3844 },
3845 CfgEdge {
3846 from: 1,
3847 to: 0,
3848 edge_type: EdgeType::BackEdge,
3849 condition: None,
3850 },
3851 ];
3852
3853 let cfg = CfgInfo {
3854 function: "runaway".to_string(),
3855 blocks,
3856 edges,
3857 entry_block: 0,
3858 exit_blocks: vec![1],
3859 cyclomatic_complexity: 2,
3860 nested_functions: HashMap::new(),
3861 };
3862
3863 let mut refs = Vec::new();
3866 let mut statements = HashMap::new();
3867
3868 for i in 0..500 {
3869 let line = (i + 1) as u32;
3870 refs.push(VarRef {
3871 name: format!("v{}", i),
3872 ref_type: RefType::Definition,
3873 line,
3874 column: 0,
3875 context: None,
3876 group_id: None,
3877 });
3878 statements.insert(line, format!("v{} = input()", i));
3879 }
3880
3881 let start = std::time::Instant::now();
3882 let result = compute_taint(&cfg, &refs, &statements, Language::Python);
3883 let elapsed = start.elapsed();
3884
3885 assert!(result.is_ok());
3886 assert!(
3887 elapsed.as_secs() < 5,
3888 "Should terminate quickly with iteration cap, took {:?}",
3889 elapsed
3890 );
3891 }
3892
3893 #[test]
3896 fn test_sources_are_deduplicated() {
3897 use crate::ast::ParserPool;
3898 use crate::types::{BlockType, CfgBlock, CfgEdge, CfgInfo, EdgeType, RefType, VarRef};
3899
3900 let python_code = r#"import os
3901
3902def vulnerable_func(user_input):
3903 data = input("Enter: ")
3904 query = "SELECT * FROM users WHERE id = " + data
3905 os.system(user_input)
3906 eval(data)
3907"#;
3908
3909 let cfg = CfgInfo {
3910 function: "vulnerable_func".to_string(),
3911 blocks: vec![
3912 CfgBlock {
3913 id: 0,
3914 block_type: BlockType::Entry,
3915 lines: (3, 3),
3916 calls: Vec::new(),
3917 },
3918 CfgBlock {
3919 id: 1,
3920 block_type: BlockType::Body,
3921 lines: (4, 7),
3922 calls: vec![
3923 "input".to_string(),
3924 "os.system".to_string(),
3925 "eval".to_string(),
3926 ],
3927 },
3928 ],
3929 edges: vec![CfgEdge {
3930 from: 0,
3931 to: 1,
3932 edge_type: EdgeType::Unconditional,
3933 condition: None,
3934 }],
3935 entry_block: 0,
3936 exit_blocks: vec![1],
3937 cyclomatic_complexity: 1,
3938 nested_functions: HashMap::new(),
3939 };
3940
3941 let refs = vec![
3942 VarRef {
3943 name: "user_input".to_string(),
3944 ref_type: RefType::Definition,
3945 line: 3,
3946 column: 0,
3947 context: None,
3948 group_id: None,
3949 },
3950 VarRef {
3951 name: "data".to_string(),
3952 ref_type: RefType::Definition,
3953 line: 4,
3954 column: 0,
3955 context: None,
3956 group_id: None,
3957 },
3958 VarRef {
3959 name: "query".to_string(),
3960 ref_type: RefType::Definition,
3961 line: 5,
3962 column: 0,
3963 context: None,
3964 group_id: None,
3965 },
3966 ];
3967
3968 let mut statements: HashMap<u32, String> = HashMap::new();
3969 for (i, line) in python_code.lines().enumerate() {
3970 statements.insert((i + 1) as u32, line.to_string());
3971 }
3972
3973 let pool = ParserPool::new();
3974 let tree = pool.parse(python_code, Language::Python).ok();
3975
3976 let result = compute_taint_with_tree(
3977 &cfg,
3978 &refs,
3979 &statements,
3980 tree.as_ref(),
3981 Some(python_code.as_bytes()),
3982 Language::Python,
3983 )
3984 .unwrap();
3985
3986 let mut seen = std::collections::HashSet::new();
3988 for source in &result.sources {
3989 let key = (
3990 source.line,
3991 std::mem::discriminant(&source.source_type),
3992 source.var.clone(),
3993 );
3994 assert!(
3995 seen.insert(key.clone()),
3996 "Duplicate source found: line={}, var={}, type={:?}",
3997 source.line,
3998 source.var,
3999 source.source_type
4000 );
4001 }
4002
4003 let mut seen_sinks = std::collections::HashSet::new();
4005 for sink in &result.sinks {
4006 let key = (
4007 sink.line,
4008 std::mem::discriminant(&sink.sink_type),
4009 sink.var.clone(),
4010 );
4011 assert!(
4012 seen_sinks.insert(key.clone()),
4013 "Duplicate sink found: line={}, var={}, type={:?}",
4014 sink.line,
4015 sink.var,
4016 sink.sink_type
4017 );
4018 }
4019 }
4020
4021 #[test]
4024 fn test_sinks_detected_via_merge() {
4025 use crate::ast::ParserPool;
4026 use crate::types::{BlockType, CfgBlock, CfgEdge, CfgInfo, EdgeType, RefType, VarRef};
4027
4028 let python_code = r#"import os
4029
4030def vuln(user_input):
4031 os.system(user_input)
4032 eval(user_input)
4033"#;
4034
4035 let cfg = CfgInfo {
4036 function: "vuln".to_string(),
4037 blocks: vec![
4038 CfgBlock {
4039 id: 0,
4040 block_type: BlockType::Entry,
4041 lines: (3, 3),
4042 calls: Vec::new(),
4043 },
4044 CfgBlock {
4045 id: 1,
4046 block_type: BlockType::Body,
4047 lines: (4, 5),
4048 calls: vec!["os.system".to_string(), "eval".to_string()],
4049 },
4050 ],
4051 edges: vec![CfgEdge {
4052 from: 0,
4053 to: 1,
4054 edge_type: EdgeType::Unconditional,
4055 condition: None,
4056 }],
4057 entry_block: 0,
4058 exit_blocks: vec![1],
4059 cyclomatic_complexity: 1,
4060 nested_functions: HashMap::new(),
4061 };
4062
4063 let refs = vec![VarRef {
4064 name: "user_input".to_string(),
4065 ref_type: RefType::Definition,
4066 line: 3,
4067 column: 0,
4068 context: None,
4069 group_id: None,
4070 }];
4071
4072 let mut statements: HashMap<u32, String> = HashMap::new();
4073 for (i, line) in python_code.lines().enumerate() {
4074 statements.insert((i + 1) as u32, line.to_string());
4075 }
4076
4077 let pool = ParserPool::new();
4078 let tree = pool.parse(python_code, Language::Python).ok();
4079
4080 let result = compute_taint_with_tree(
4081 &cfg,
4082 &refs,
4083 &statements,
4084 tree.as_ref(),
4085 Some(python_code.as_bytes()),
4086 Language::Python,
4087 )
4088 .unwrap();
4089
4090 let sink_types: Vec<_> = result.sinks.iter().map(|s| s.sink_type).collect();
4092 assert!(
4093 sink_types.contains(&TaintSinkType::ShellExec),
4094 "Should detect os.system as ShellExec sink, got: {:?}",
4095 sink_types
4096 );
4097 assert!(
4098 sink_types.contains(&TaintSinkType::CodeEval),
4099 "Should detect eval as CodeEval sink, got: {:?}",
4100 sink_types
4101 );
4102 }
4103}