use super::types::{
Location, PropagationStep, TaintLabel, TaintPropagation, TaintState, TaintedValue,
};
use crate::callgraph::types::{CallGraph, FunctionRef};
use crate::cfg::types::{BlockId, CFGInfo, EdgeType};
use crate::dfg::types::{DataflowEdge, DataflowKind, DFGInfo};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet, VecDeque};
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct NodeId {
pub file: String,
pub line: usize,
pub variable: Option<String>,
}
impl NodeId {
#[inline]
pub fn from_line(line: usize) -> Self {
Self {
file: String::new(),
line,
variable: None,
}
}
#[inline]
pub fn new(file: impl Into<String>, line: usize) -> Self {
Self {
file: file.into(),
line,
variable: None,
}
}
#[inline]
pub fn with_variable(file: impl Into<String>, line: usize, variable: impl Into<String>) -> Self {
Self {
file: file.into(),
line,
variable: Some(variable.into()),
}
}
pub fn to_location(&self) -> Location {
Location::new(
if self.file.is_empty() { "<function>" } else { &self.file },
self.line,
1,
)
}
}
impl std::fmt::Display for NodeId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.file.is_empty() {
write!(f, "line:{}", self.line)?;
} else {
write!(f, "{}:{}", self.file, self.line)?;
}
if let Some(ref var) = self.variable {
write!(f, ":{}", var)?;
}
Ok(())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TaintedEdge {
pub edge: DataflowEdge,
pub propagation: TaintPropagation,
#[serde(skip_serializing_if = "HashSet::is_empty", default)]
pub labels: HashSet<TaintLabel>,
#[serde(default)]
pub sanitized: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub sanitization_method: Option<String>,
}
impl TaintedEdge {
pub fn from_dfg_edge(edge: DataflowEdge) -> Self {
let propagation = Self::infer_propagation(&edge);
Self {
edge,
propagation,
labels: HashSet::new(),
sanitized: false,
sanitization_method: None,
}
}
fn infer_propagation(edge: &DataflowEdge) -> TaintPropagation {
match edge.kind {
DataflowKind::Definition => TaintPropagation::Copy,
DataflowKind::Use => TaintPropagation::Copy,
DataflowKind::Mutation => TaintPropagation::Transform,
DataflowKind::Return => TaintPropagation::CallReturn,
DataflowKind::Param => TaintPropagation::Copy,
DataflowKind::Yield => TaintPropagation::CallReturn,
DataflowKind::ClosureCapture => TaintPropagation::Copy,
DataflowKind::Goroutine => TaintPropagation::Copy,
DataflowKind::ChannelSend | DataflowKind::ChannelReceive => TaintPropagation::Copy,
DataflowKind::Defer => TaintPropagation::Copy,
DataflowKind::TypeAssertion => TaintPropagation::Transform,
DataflowKind::ComprehensionVar
| DataflowKind::LambdaParam
| DataflowKind::NestedParam => TaintPropagation::Copy,
DataflowKind::Global | DataflowKind::Nonlocal => TaintPropagation::Copy,
DataflowKind::Panic | DataflowKind::Recover => TaintPropagation::Transform,
DataflowKind::ErrorAssign | DataflowKind::ErrorCheck => TaintPropagation::Copy,
DataflowKind::NamedReturnModify => TaintPropagation::Transform,
DataflowKind::ChannelMake
| DataflowKind::ChannelCloseDfg
| DataflowKind::SelectReceive
| DataflowKind::SelectSend => TaintPropagation::Copy,
DataflowKind::MutexLock
| DataflowKind::MutexUnlock
| DataflowKind::WaitGroupAdd
| DataflowKind::WaitGroupDone
| DataflowKind::WaitGroupWait
| DataflowKind::OnceDo => TaintPropagation::Copy,
DataflowKind::ContextDone
| DataflowKind::ContextErr
| DataflowKind::ContextValue => TaintPropagation::Copy,
DataflowKind::PoolGet | DataflowKind::PoolPut => TaintPropagation::Copy,
}
}
pub fn mark_sanitized(&mut self, method: impl Into<String>) {
self.sanitized = true;
self.sanitization_method = Some(method.into());
self.propagation = TaintPropagation::Sanitize;
}
pub fn add_labels(&mut self, labels: impl IntoIterator<Item = TaintLabel>) {
self.labels.extend(labels);
}
pub fn is_tainted(&self) -> bool {
!self.labels.is_empty() && !self.sanitized
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TaintedDFG {
pub dfg: DFGInfo,
pub taint_labels: HashMap<NodeId, HashSet<TaintLabel>>,
pub sanitization_points: HashSet<NodeId>,
pub function_name: String,
pub file_path: String,
pub source_lines: HashMap<usize, Vec<TaintLabel>>,
pub sink_lines: HashSet<usize>,
pub sanitization_methods: HashMap<usize, String>,
pub edges: Vec<TaintedEdge>,
}
impl TaintedDFG {
pub fn from_dfg(dfg: &DFGInfo, file_path: impl Into<String>) -> Self {
let edges = dfg
.edges
.iter()
.map(|e| TaintedEdge::from_dfg_edge(e.clone()))
.collect();
Self {
dfg: dfg.clone(),
taint_labels: HashMap::new(),
sanitization_points: HashSet::new(),
function_name: dfg.function_name.clone(),
file_path: file_path.into(),
source_lines: HashMap::new(),
sink_lines: HashSet::new(),
sanitization_methods: HashMap::new(),
edges,
}
}
pub fn mark_tainted(&mut self, node: NodeId, labels: impl IntoIterator<Item = TaintLabel>) {
self.taint_labels
.entry(node)
.or_default()
.extend(labels);
}
pub fn mark_source(&mut self, line: usize, label: TaintLabel) {
let node = NodeId::from_line(line);
self.source_lines.entry(line).or_default().push(label.clone());
self.mark_tainted(node, std::iter::once(label));
}
pub fn mark_sink(&mut self, line: usize) {
self.sink_lines.insert(line);
}
pub fn mark_sanitization(&mut self, line: usize, method: impl Into<String>) {
let node = NodeId::from_line(line);
self.sanitization_points.insert(node);
let method_str = method.into();
self.sanitization_methods.insert(line, method_str.clone());
for edge in &mut self.edges {
if edge.edge.to_line == line || edge.edge.from_line == line {
edge.mark_sanitized(&method_str);
}
}
}
pub fn is_tainted(&self, node: &NodeId) -> bool {
self.taint_labels
.get(node)
.map_or(false, |labels| !labels.is_empty())
&& !self.sanitization_points.contains(node)
}
pub fn get_taint(&self, node: &NodeId) -> Option<&HashSet<TaintLabel>> {
if self.sanitization_points.contains(node) {
None
} else {
self.taint_labels.get(node)
}
}
pub fn edges_from(&self, line: usize) -> Vec<&TaintedEdge> {
self.edges
.iter()
.filter(|e| e.edge.from_line == line)
.collect()
}
pub fn edges_to(&self, line: usize) -> Vec<&TaintedEdge> {
self.edges
.iter()
.filter(|e| e.edge.to_line == line)
.collect()
}
pub fn tainted_nodes(&self) -> Vec<(&NodeId, &HashSet<TaintLabel>)> {
self.taint_labels
.iter()
.filter(|(node, labels)| !labels.is_empty() && !self.sanitization_points.contains(*node))
.collect()
}
pub fn has_tainted_path(&self, source_line: usize, sink_line: usize) -> bool {
let mut visited = HashSet::new();
let mut queue = VecDeque::new();
queue.push_back(source_line);
while let Some(current) = queue.pop_front() {
if current == sink_line {
return true;
}
if !visited.insert(current) {
continue;
}
for edge in self.edges_from(current) {
if !edge.sanitized {
queue.push_back(edge.edge.to_line);
}
}
}
false
}
}
#[derive(Debug, Clone)]
pub struct PropagationConfig {
pub max_iterations: usize,
pub track_implicit_flows: bool,
pub conservative_collections: bool,
pub taint_through_calls: bool,
pub inter_procedural: bool,
pub max_call_depth: usize,
pub min_confidence: f64,
}
impl Default for PropagationConfig {
fn default() -> Self {
Self {
max_iterations: 100,
track_implicit_flows: true,
conservative_collections: true,
taint_through_calls: true,
inter_procedural: true,
max_call_depth: 5,
min_confidence: 0.5,
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct FunctionTaintSummary {
pub name: String,
pub tainted_params: HashSet<usize>,
pub return_tainted: bool,
pub param_to_return: HashSet<usize>,
pub is_sanitizer: bool,
pub sanitization_method: Option<String>,
pub taints_globals: bool,
pub tainted_globals: HashSet<String>,
}
impl FunctionTaintSummary {
pub fn new(name: impl Into<String>) -> Self {
Self {
name: name.into(),
..Default::default()
}
}
pub fn as_sanitizer(mut self, method: impl Into<String>) -> Self {
self.is_sanitizer = true;
self.sanitization_method = Some(method.into());
self
}
pub fn param_flows_to_return(mut self, param_idx: usize) -> Self {
self.param_to_return.insert(param_idx);
self.return_tainted = true;
self
}
pub fn all_params_flow_to_return(mut self, param_count: usize) -> Self {
for i in 0..param_count {
self.param_to_return.insert(i);
}
self.return_tainted = true;
self
}
}
#[derive(Debug, Clone, Default)]
pub struct ImplicitFlowContext {
pub condition_taint_stack: Vec<HashSet<TaintLabel>>,
pub tainted_blocks: HashSet<BlockId>,
pub current_implicit_taint: HashSet<TaintLabel>,
}
impl ImplicitFlowContext {
pub fn enter_branch(&mut self, condition_taint: HashSet<TaintLabel>) {
if !condition_taint.is_empty() {
self.current_implicit_taint.extend(condition_taint.iter().cloned());
}
self.condition_taint_stack.push(condition_taint);
}
pub fn exit_branch(&mut self) {
if let Some(exited_taint) = self.condition_taint_stack.pop() {
self.current_implicit_taint.clear();
for taint_set in &self.condition_taint_stack {
self.current_implicit_taint.extend(taint_set.iter().cloned());
}
let _ = exited_taint; }
}
pub fn has_implicit_taint(&self) -> bool {
!self.current_implicit_taint.is_empty()
}
pub fn get_implicit_taint(&self) -> &HashSet<TaintLabel> {
&self.current_implicit_taint
}
pub fn mark_block_tainted(&mut self, block_id: BlockId) {
self.tainted_blocks.insert(block_id);
}
pub fn is_block_tainted(&self, block_id: BlockId) -> bool {
self.tainted_blocks.contains(&block_id)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct WorklistItem {
node: NodeId,
source_node: NodeId,
depth: usize,
}
pub struct PropagationEngine {
config: PropagationConfig,
state: TaintState,
findings: Vec<TaintFlow>,
summaries: HashMap<String, FunctionTaintSummary>,
implicit_context: ImplicitFlowContext,
}
impl PropagationEngine {
pub fn new() -> Self {
Self {
config: PropagationConfig::default(),
state: TaintState::new(),
findings: Vec::new(),
summaries: HashMap::new(),
implicit_context: ImplicitFlowContext::default(),
}
}
pub fn with_config(config: PropagationConfig) -> Self {
Self {
config,
state: TaintState::new(),
findings: Vec::new(),
summaries: HashMap::new(),
implicit_context: ImplicitFlowContext::default(),
}
}
pub fn reset(&mut self) {
self.state.clear();
self.findings.clear();
self.implicit_context = ImplicitFlowContext::default();
}
pub fn findings(&self) -> &[TaintFlow] {
&self.findings
}
pub fn state(&self) -> &TaintState {
&self.state
}
pub fn register_summary(&mut self, summary: FunctionTaintSummary) {
self.summaries.insert(summary.name.clone(), summary);
}
pub fn introduce_taint(
&mut self,
variable: &str,
label: TaintLabel,
location: Location,
expression: Option<&str>,
) {
let mut taint = TaintedValue::new(label, location);
if let Some(expr) = expression {
taint = taint.with_source_expression(expr);
}
self.state.set_variable(variable, taint);
}
pub fn propagate_assignment(&mut self, target: &str, source: &str, location: Location) {
let explicit_taint = self.state.get_variable(source).cloned();
let combined_taint = if self.config.track_implicit_flows && self.implicit_context.has_implicit_taint() {
let implicit_labels = self.implicit_context.get_implicit_taint().clone();
match explicit_taint {
Some(mut t) => {
t.labels.extend(implicit_labels);
t.add_step(PropagationStep {
location: location.clone(),
propagation: TaintPropagation::ImplicitFlow,
operation: Some("implicit flow from branch condition".to_string()),
});
Some(t)
}
None if !implicit_labels.is_empty() => {
let mut t = TaintedValue::with_labels(implicit_labels, location.clone());
t.add_step(PropagationStep {
location,
propagation: TaintPropagation::ImplicitFlow,
operation: Some("implicit flow from branch condition".to_string()),
});
t.confidence = 0.7; Some(t)
}
None => None,
}
} else {
explicit_taint.map(|t| t.propagate_copy(location))
};
if let Some(taint) = combined_taint {
self.state.set_variable(target, taint);
}
}
pub fn propagate_binary_op(
&mut self,
result: &str,
left: &str,
right: &str,
operator: &str,
location: Location,
) {
if matches!(operator, "==" | "!=" | "<" | ">" | "<=" | ">=" | "is" | "in" | "not in") {
return;
}
let left_taint = self.state.get_variable(left);
let right_taint = self.state.get_variable(right);
match (left_taint, right_taint) {
(Some(lt), Some(rt)) => {
let merged = TaintedValue::merge(&[lt, rt], location);
self.state.set_variable(result, merged);
}
(Some(lt), None) => {
let propagated = lt.propagate_copy(location);
self.state.set_variable(result, propagated);
}
(None, Some(rt)) => {
let propagated = rt.propagate_copy(location);
self.state.set_variable(result, propagated);
}
(None, None) => {
if self.config.track_implicit_flows && self.implicit_context.has_implicit_taint() {
let implicit_labels = self.implicit_context.get_implicit_taint().clone();
if !implicit_labels.is_empty() {
let mut t = TaintedValue::with_labels(implicit_labels, location.clone());
t.add_step(PropagationStep {
location,
propagation: TaintPropagation::ImplicitFlow,
operation: Some("implicit flow in binary op".to_string()),
});
t.confidence = 0.7;
self.state.set_variable(result, t);
}
}
}
}
}
pub fn propagate_string_concat(
&mut self,
result: &str,
parts: &[&str],
location: Location,
) {
let mut tainted_parts: Vec<&TaintedValue> = Vec::new();
for part in parts {
if let Some(t) = self.state.get_variable(part) {
if t.is_tainted() {
tainted_parts.push(t);
}
}
}
if !tainted_parts.is_empty() {
let merged = TaintedValue::merge(&tainted_parts, location.clone());
let transformed = merged.transform("string_concat", location);
self.state.set_variable(result, transformed);
}
}
pub fn propagate_type_conversion(
&mut self,
result: &str,
source: &str,
target_type: &str,
location: Location,
) {
if let Some(source_taint) = self.state.get_variable(source) {
let converted = source_taint.transform(&format!("convert_to_{}", target_type), location);
self.state.set_variable(result, converted);
}
}
pub fn propagate_serialization(
&mut self,
result: &str,
source: &str,
format: &str,
location: Location,
) {
if let Some(source_taint) = self.state.get_variable(source) {
let serialized = source_taint.transform(&format!("{}_serialize", format), location);
self.state.set_variable(result, serialized);
}
}
pub fn propagate_collection_access(
&mut self,
result: &str,
collection: &str,
access_expr: &str,
location: Location,
) {
if let Some(collection_taint) = self.state.get_collection(collection) {
let accessed = collection_taint.collection_access(access_expr, location);
self.state.set_variable(result, accessed);
}
}
pub fn propagate_collection_store(
&mut self,
collection: &str,
value: &str,
location: Location,
) {
if let Some(value_taint) = self.state.get_variable(value).cloned() {
if self.config.conservative_collections {
let mut collection_taint = value_taint;
collection_taint.add_step(PropagationStep {
location,
propagation: TaintPropagation::CollectionStore,
operation: Some(format!("store to {}", collection)),
});
self.state.set_collection(collection, collection_taint);
}
}
}
pub fn propagate_property_access(
&mut self,
result: &str,
object: &str,
property: &str,
location: Location,
) {
if let Some(prop_taint) = self.state.get_property(object, property).cloned() {
let accessed = prop_taint.collection_access(&format!(".{}", property), location);
self.state.set_variable(result, accessed);
}
}
pub fn propagate_property_store(
&mut self,
object: &str,
property: &str,
value: &str,
location: Location,
) {
let value_taint = match self.state.get_variable(value) {
Some(t) => t.clone(),
None => return,
};
let mut prop_taint = value_taint.clone();
prop_taint.add_step(PropagationStep {
location: location.clone(),
propagation: TaintPropagation::CollectionStore,
operation: Some(format!("{}.{}", object, property)),
});
self.state.set_property(object, property, prop_taint);
if self.config.conservative_collections {
let mut obj_taint = value_taint;
obj_taint.add_step(PropagationStep {
location,
propagation: TaintPropagation::CollectionStore,
operation: Some(format!("property store to {}", object)),
});
self.state.set_variable(object, obj_taint);
}
}
pub fn propagate_call_return(
&mut self,
result: &str,
function: &str,
arguments: &[&str],
location: Location,
) {
if let Some(summary) = self.summaries.get(function) {
if summary.is_sanitizer {
if let Some(method) = &summary.sanitization_method {
for arg in arguments {
if let Some(t) = self.state.get_variable(arg) {
if t.is_tainted() {
let sanitized = t.sanitize(method, location.clone());
self.state.set_variable(result, sanitized);
return;
}
}
}
}
return;
}
if !summary.param_to_return.is_empty() {
let mut tainted_args: Vec<&TaintedValue> = Vec::new();
for ¶m_idx in &summary.param_to_return {
if let Some(arg) = arguments.get(param_idx) {
if let Some(t) = self.state.get_variable(arg) {
if t.is_tainted() {
tainted_args.push(t);
}
}
}
}
if !tainted_args.is_empty() {
let merged = TaintedValue::merge(&tainted_args, location.clone());
let returned = merged.call_return(function, location);
self.state.set_variable(result, returned);
}
return;
}
}
if !self.config.taint_through_calls {
return;
}
let mut tainted_args: Vec<&TaintedValue> = Vec::new();
for arg in arguments {
if let Some(arg_taint) = self.state.get_variable(arg) {
if arg_taint.is_tainted() {
tainted_args.push(arg_taint);
}
}
}
if !tainted_args.is_empty() {
let merged = TaintedValue::merge(&tainted_args, location.clone());
let returned = merged.call_return(function, location);
self.state.set_variable(result, returned);
}
}
pub fn sanitize(&mut self, variable: &str, method: &str, location: Location) {
if let Some(taint) = self.state.get_variable(variable).cloned() {
let sanitized = taint.sanitize(method, location);
self.state.set_variable(variable, sanitized);
}
}
pub fn enter_branch(&mut self, condition_var: &str) {
if self.config.track_implicit_flows {
let condition_taint = self
.state
.get_variable(condition_var)
.map(|t| t.labels.clone())
.unwrap_or_default();
self.implicit_context.enter_branch(condition_taint);
}
}
pub fn exit_branch(&mut self) {
if self.config.track_implicit_flows {
self.implicit_context.exit_branch();
}
}
pub fn check_sink(&mut self, variable: &str, sink_type: &str, location: Location) {
if let Some(taint) = self.state.get_variable(variable) {
if taint.is_tainted() && taint.confidence >= self.config.min_confidence {
self.findings.push(TaintFlow {
source: taint.source_location.clone(),
sink: location,
labels: taint.labels.clone(),
path: taint.propagation_path.clone(),
sink_type: sink_type.to_string(),
variable: variable.to_string(),
confidence: taint.confidence,
source_expression: taint.source_expression.clone(),
});
}
}
}
pub fn analyze_dfg(&mut self, dfg: &mut TaintedDFG) -> Vec<TaintFlow> {
self.reset();
let source_lines = dfg.source_lines.clone();
let function_name = dfg.function_name.clone();
let file_path = dfg.file_path.clone();
let edge_count = dfg.edges.len();
let mut from_lines: Vec<u32> = Vec::with_capacity(edge_count);
let mut to_lines: Vec<usize> = Vec::with_capacity(edge_count);
let mut variables: Vec<String> = Vec::with_capacity(edge_count);
let mut kinds: Vec<DataflowKind> = Vec::with_capacity(edge_count);
let mut sanitized: Vec<bool> = Vec::with_capacity(edge_count);
for e in &dfg.edges {
from_lines.push(e.edge.from_line as u32);
to_lines.push(e.edge.to_line);
variables.push(e.edge.variable.clone());
kinds.push(e.edge.kind);
sanitized.push(e.sanitized);
}
let mut matching_indices: Vec<usize> = Vec::new();
let mut initial_nodes: Vec<(NodeId, TaintLabel)> = Vec::new();
for (line, labels) in &source_lines {
let location = Location::new(&function_name, *line, 1);
crate::simd::find_matching_u32_into(&from_lines, *line as u32, &mut matching_indices);
for label in labels {
for &idx in &matching_indices {
if matches!(kinds[idx], DataflowKind::Definition) {
self.introduce_taint(&variables[idx], label.clone(), location.clone(), None);
let node = NodeId::with_variable(&file_path, *line, &variables[idx]);
initial_nodes.push((node, label.clone()));
}
}
}
}
for (node, label) in initial_nodes {
dfg.mark_tainted(node, std::iter::once(label));
}
let mut worklist: VecDeque<usize> = VecDeque::new();
let mut in_worklist: HashSet<usize> = HashSet::new();
for &line in source_lines.keys() {
if in_worklist.insert(line) {
worklist.push_back(line);
}
}
let mut nodes_to_mark: Vec<(NodeId, HashSet<TaintLabel>)> = Vec::new();
let mut iterations = 0;
while let Some(current_line) = worklist.pop_front() {
in_worklist.remove(¤t_line);
if iterations >= self.config.max_iterations {
break;
}
iterations += 1;
crate::simd::find_matching_u32_into(&from_lines, current_line as u32, &mut matching_indices);
for &idx in &matching_indices {
if sanitized[idx] {
continue; }
let variable = &variables[idx];
if !self.state.is_variable_tainted(variable) {
continue;
}
let target_line = to_lines[idx];
let target_var = variable.clone();
let was_tainted = self.state.is_variable_tainted(&target_var);
let location = Location::new(&function_name, target_line, 1);
self.propagate_assignment(&target_var, variable, location);
let is_tainted = self.state.is_variable_tainted(&target_var);
if is_tainted && !was_tainted {
if let Some(labels) = self.state.get_variable(&target_var).map(|t| t.labels.clone()) {
let node = NodeId::with_variable(&file_path, target_line, &target_var);
nodes_to_mark.push((node, labels));
}
if in_worklist.insert(target_line) {
worklist.push_back(target_line);
}
}
}
}
for (node, labels) in nodes_to_mark {
dfg.mark_tainted(node, labels);
}
let sink_lines: Vec<usize> = dfg.sink_lines.iter().copied().collect();
for sink_line in sink_lines {
let location = Location::new(&function_name, sink_line, 1);
for (idx, &to_line) in to_lines.iter().enumerate() {
if to_line == sink_line {
self.check_sink(&variables[idx], "generic_sink", location.clone());
}
}
}
self.findings.clone()
}
pub fn analyze_inter_procedural(
&mut self,
dfg: &mut TaintedDFG,
cfg: Option<&CFGInfo>,
call_graph: Option<&CallGraph>,
function_dfgs: &HashMap<String, DFGInfo>,
) -> Vec<TaintFlow> {
let mut findings = self.analyze_dfg(dfg);
if !self.config.inter_procedural || call_graph.is_none() {
return findings;
}
let call_graph = call_graph.unwrap();
let current_func = FunctionRef {
file: dfg.file_path.clone(),
name: dfg.function_name.clone(),
qualified_name: None,
};
let callees = call_graph.get_callees(¤t_func, 1);
for callee in callees {
if let Some(_callee_dfg) = function_dfgs.get(&callee.name) {
for edge in &dfg.edges {
if edge.edge.kind == DataflowKind::Return {
let call_line = edge.edge.from_line;
for other_edge in &dfg.edges {
if other_edge.edge.to_line == call_line {
if self.state.is_variable_tainted(&other_edge.edge.variable) {
let location = Location::new(&dfg.function_name, call_line, 1);
self.check_sink(
&other_edge.edge.variable,
&format!("call_to_{}", callee.name),
location,
);
}
}
}
}
}
}
}
if self.config.track_implicit_flows {
if let Some(cfg) = cfg {
self.analyze_implicit_flows(dfg, cfg);
}
}
findings.append(&mut self.findings.clone());
findings
}
fn analyze_implicit_flows(&mut self, dfg: &mut TaintedDFG, cfg: &CFGInfo) {
let edges_snapshot: Vec<(usize, String, DataflowKind)> = dfg
.edges
.iter()
.map(|e| (e.edge.from_line, e.edge.variable.clone(), e.edge.kind))
.collect();
let file_path = dfg.file_path.clone();
for block in cfg.blocks.values() {
if block.block_type == crate::cfg::types::BlockType::Branch {
for line in block.start_line..=block.end_line {
let node = NodeId::from_line(line);
if dfg.is_tainted(&node) {
for edge in &cfg.edges {
if edge.from == block.id {
match edge.edge_type {
EdgeType::True | EdgeType::False => {
self.implicit_context.mark_block_tainted(edge.to);
}
_ => {}
}
}
}
}
}
}
}
let mut nodes_to_mark: Vec<(NodeId, HashSet<TaintLabel>)> = Vec::new();
for block in cfg.blocks.values() {
if self.implicit_context.is_block_tainted(block.id) {
let implicit_labels = self.implicit_context.get_implicit_taint().clone();
if !implicit_labels.is_empty() {
for line in block.start_line..=block.end_line {
for (from_line, variable, kind) in &edges_snapshot {
if *from_line == line && matches!(kind, DataflowKind::Definition) {
let node = NodeId::with_variable(&file_path, line, variable);
nodes_to_mark.push((node, implicit_labels.clone()));
}
}
}
}
}
}
for (node, labels) in nodes_to_mark {
dfg.mark_tainted(node, labels);
}
}
pub fn merge_state(&mut self, other: &PropagationEngine, location: &Location) {
self.state.merge(&other.state, location);
}
}
impl Default for PropagationEngine {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TaintFlow {
pub source: Location,
pub sink: Location,
pub labels: HashSet<TaintLabel>,
pub path: Vec<PropagationStep>,
pub sink_type: String,
pub variable: String,
pub confidence: f64,
pub source_expression: Option<String>,
}
impl TaintFlow {
pub fn max_severity(&self) -> u8 {
self.labels
.iter()
.map(TaintLabel::severity_weight)
.max()
.unwrap_or(0)
}
pub fn description(&self) -> String {
let labels: Vec<_> = self.labels.iter().map(|l| l.to_string()).collect();
format!(
"Taint flow from {} to {} via {} (labels: {})",
self.source,
self.sink,
self.variable,
labels.join(", ")
)
}
pub fn has_implicit_flow(&self) -> bool {
self.path.iter().any(|step| step.propagation == TaintPropagation::ImplicitFlow)
}
}
#[derive(Debug, Clone)]
pub struct PropagationRule {
pub name: String,
pub arg_to_return: Vec<usize>,
pub is_sanitizer: bool,
pub sanitization_method: Option<String>,
pub propagate_all: bool,
}
impl PropagationRule {
pub fn propagate_all(name: impl Into<String>) -> Self {
Self {
name: name.into(),
arg_to_return: Vec::new(),
is_sanitizer: false,
sanitization_method: None,
propagate_all: true,
}
}
pub fn propagate_args(name: impl Into<String>, args: Vec<usize>) -> Self {
Self {
name: name.into(),
arg_to_return: args,
is_sanitizer: false,
sanitization_method: None,
propagate_all: false,
}
}
pub fn sanitizer(name: impl Into<String>, method: impl Into<String>) -> Self {
Self {
name: name.into(),
arg_to_return: Vec::new(),
is_sanitizer: true,
sanitization_method: Some(method.into()),
propagate_all: false,
}
}
}
#[derive(Debug, Default)]
pub struct PropagationRules {
rules: HashMap<String, PropagationRule>,
}
impl PropagationRules {
pub fn new() -> Self {
Self::default()
}
pub fn add_rule(&mut self, rule: PropagationRule) {
self.rules.insert(rule.name.clone(), rule);
}
pub fn get_rule(&self, name: &str) -> Option<&PropagationRule> {
self.rules.get(name)
}
pub fn is_sanitizer(&self, name: &str) -> bool {
self.rules.get(name).map_or(false, |r| r.is_sanitizer)
}
pub fn python_defaults() -> Self {
let mut rules = Self::new();
rules.add_rule(PropagationRule::propagate_args("str.upper", vec![0]));
rules.add_rule(PropagationRule::propagate_args("str.lower", vec![0]));
rules.add_rule(PropagationRule::propagate_args("str.strip", vec![0]));
rules.add_rule(PropagationRule::propagate_args("str.format", vec![0]));
rules.add_rule(PropagationRule::propagate_all("str.join"));
rules.add_rule(PropagationRule::propagate_all("+"));
rules.add_rule(PropagationRule::propagate_args("json.dumps", vec![0]));
rules.add_rule(PropagationRule::propagate_args("json.loads", vec![0]));
rules.add_rule(PropagationRule::propagate_args("str", vec![0]));
rules.add_rule(PropagationRule::propagate_args("int", vec![0]));
rules.add_rule(PropagationRule::propagate_args("float", vec![0]));
rules.add_rule(PropagationRule::sanitizer("html.escape", "html_escape"));
rules.add_rule(PropagationRule::sanitizer("markupsafe.escape", "markup_escape"));
rules.add_rule(PropagationRule::sanitizer("bleach.clean", "bleach_clean"));
rules.add_rule(PropagationRule::sanitizer("shlex.quote", "shell_escape"));
rules.add_rule(PropagationRule::sanitizer("urllib.parse.quote", "url_encode"));
rules.add_rule(PropagationRule::sanitizer("cgi.escape", "cgi_escape"));
rules
}
pub fn typescript_defaults() -> Self {
let mut rules = Self::new();
rules.add_rule(PropagationRule::propagate_args("String.prototype.toUpperCase", vec![0]));
rules.add_rule(PropagationRule::propagate_args("String.prototype.toLowerCase", vec![0]));
rules.add_rule(PropagationRule::propagate_args("String.prototype.trim", vec![0]));
rules.add_rule(PropagationRule::propagate_all("String.prototype.concat"));
rules.add_rule(PropagationRule::propagate_all("+"));
rules.add_rule(PropagationRule::propagate_args("JSON.stringify", vec![0]));
rules.add_rule(PropagationRule::propagate_args("JSON.parse", vec![0]));
rules.add_rule(PropagationRule::propagate_args("String", vec![0]));
rules.add_rule(PropagationRule::propagate_args("Number", vec![0]));
rules.add_rule(PropagationRule::propagate_args("toString", vec![0]));
rules.add_rule(PropagationRule::sanitizer("DOMPurify.sanitize", "dom_purify"));
rules.add_rule(PropagationRule::sanitizer("escape", "html_escape"));
rules.add_rule(PropagationRule::sanitizer("encodeURIComponent", "url_encode"));
rules.add_rule(PropagationRule::sanitizer("validator.escape", "validator_escape"));
rules.add_rule(PropagationRule::sanitizer("xss", "xss_filter"));
rules
}
pub fn go_defaults() -> Self {
let mut rules = Self::new();
rules.add_rule(PropagationRule::propagate_all("strings.Join"));
rules.add_rule(PropagationRule::propagate_args("strings.ToUpper", vec![0]));
rules.add_rule(PropagationRule::propagate_args("strings.ToLower", vec![0]));
rules.add_rule(PropagationRule::propagate_args("strings.TrimSpace", vec![0]));
rules.add_rule(PropagationRule::propagate_args("fmt.Sprintf", vec![0, 1, 2, 3, 4]));
rules.add_rule(PropagationRule::propagate_args("json.Marshal", vec![0]));
rules.add_rule(PropagationRule::propagate_args("json.Unmarshal", vec![0]));
rules.add_rule(PropagationRule::propagate_args("strconv.Itoa", vec![0]));
rules.add_rule(PropagationRule::propagate_args("strconv.Atoi", vec![0]));
rules.add_rule(PropagationRule::sanitizer("html.EscapeString", "html_escape"));
rules.add_rule(PropagationRule::sanitizer("url.QueryEscape", "url_encode"));
rules.add_rule(PropagationRule::sanitizer("template.HTMLEscapeString", "html_escape"));
rules
}
pub fn rust_defaults() -> Self {
let mut rules = Self::new();
rules.add_rule(PropagationRule::propagate_args("to_uppercase", vec![0]));
rules.add_rule(PropagationRule::propagate_args("to_lowercase", vec![0]));
rules.add_rule(PropagationRule::propagate_args("trim", vec![0]));
rules.add_rule(PropagationRule::propagate_all("format!"));
rules.add_rule(PropagationRule::propagate_args("to_string", vec![0]));
rules.add_rule(PropagationRule::propagate_args("serde_json::to_string", vec![0]));
rules.add_rule(PropagationRule::propagate_args("serde_json::from_str", vec![0]));
rules.add_rule(PropagationRule::sanitizer("html_escape", "html_escape"));
rules.add_rule(PropagationRule::sanitizer("encode_minimal", "html_escape"));
rules.add_rule(PropagationRule::sanitizer("urlencoding::encode", "url_encode"));
rules
}
pub fn to_summaries(&self) -> Vec<FunctionTaintSummary> {
self.rules
.values()
.map(|rule| {
let mut summary = FunctionTaintSummary::new(&rule.name);
if rule.is_sanitizer {
summary.is_sanitizer = true;
summary.sanitization_method = rule.sanitization_method.clone();
} else if rule.propagate_all {
for i in 0..10 {
summary.param_to_return.insert(i);
}
summary.return_tainted = true;
} else {
for &idx in &rule.arg_to_return {
summary.param_to_return.insert(idx);
}
summary.return_tainted = !rule.arg_to_return.is_empty();
}
summary
})
.collect()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TaintTraceResult {
pub file: String,
pub start_line: usize,
pub tainted_node_count: usize,
pub tainted_lines: HashMap<usize, Vec<String>>,
pub flows: Vec<TaintFlow>,
pub implicit_flow_tracking: bool,
pub sanitization_points: Vec<usize>,
}
impl TaintTraceResult {
pub fn new(file: impl Into<String>, start_line: usize) -> Self {
Self {
file: file.into(),
start_line,
tainted_node_count: 0,
tainted_lines: HashMap::new(),
flows: Vec::new(),
implicit_flow_tracking: false,
sanitization_points: Vec::new(),
}
}
pub fn to_json(&self) -> serde_json::Value {
serde_json::json!({
"file": self.file,
"start_line": self.start_line,
"tainted_node_count": self.tainted_node_count,
"tainted_lines": self.tainted_lines,
"flow_count": self.flows.len(),
"flows": self.flows.iter().map(|f| serde_json::json!({
"source": f.source.to_string(),
"sink": f.sink.to_string(),
"labels": f.labels.iter().map(|l| l.to_string()).collect::<Vec<_>>(),
"sink_type": f.sink_type,
"variable": f.variable,
"confidence": f.confidence,
"has_implicit_flow": f.has_implicit_flow(),
})).collect::<Vec<_>>(),
"implicit_flow_tracking": self.implicit_flow_tracking,
"sanitization_points": self.sanitization_points,
})
}
}
pub fn trace_taint_from_line(
file_path: &str,
source_line: usize,
function_name: &str,
language: &str,
config: Option<PropagationConfig>,
) -> Result<TaintTraceResult, String> {
let dfg = crate::dfg::DfgBuilder::extract_from_file_with_language(
file_path,
function_name,
Some(language),
)
.map_err(|e| format!("Failed to extract DFG: {}", e))?;
let config = config.unwrap_or_default();
let mut engine = PropagationEngine::with_config(config.clone());
let rules = match language {
"python" => PropagationRules::python_defaults(),
"typescript" | "javascript" => PropagationRules::typescript_defaults(),
"go" => PropagationRules::go_defaults(),
"rust" => PropagationRules::rust_defaults(),
_ => PropagationRules::new(),
};
for summary in rules.to_summaries() {
engine.register_summary(summary);
}
let mut tainted_dfg = TaintedDFG::from_dfg(&dfg, file_path);
tainted_dfg.mark_source(source_line, TaintLabel::UserInput);
let flows = engine.analyze_dfg(&mut tainted_dfg);
let mut result = TaintTraceResult::new(file_path, source_line);
result.implicit_flow_tracking = config.track_implicit_flows;
for (node, labels) in tainted_dfg.tainted_nodes() {
result
.tainted_lines
.entry(node.line)
.or_default()
.extend(labels.iter().map(|l| l.to_string()));
}
result.tainted_node_count = result.tainted_lines.len();
for node in &tainted_dfg.sanitization_points {
result.sanitization_points.push(node.line);
}
result.sanitization_points.sort();
result.sanitization_points.dedup();
result.flows = flows;
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
fn test_location() -> Location {
Location::new("test.py", 10, 5)
}
#[test]
fn test_node_id() {
let node1 = NodeId::from_line(42);
assert_eq!(node1.line, 42);
assert!(node1.file.is_empty());
let node2 = NodeId::new("test.py", 10);
assert_eq!(node2.file, "test.py");
assert_eq!(node2.line, 10);
let node3 = NodeId::with_variable("test.py", 10, "x");
assert_eq!(node3.variable, Some("x".to_string()));
}
#[test]
fn test_propagation_engine_basic() {
let mut engine = PropagationEngine::new();
let loc1 = Location::new("test.py", 1, 1);
let loc2 = Location::new("test.py", 2, 1);
engine.introduce_taint("user_input", TaintLabel::UserInput, loc1, Some("input()"));
engine.propagate_assignment("x", "user_input", loc2);
assert!(engine.state().is_variable_tainted("user_input"));
assert!(engine.state().is_variable_tainted("x"));
}
#[test]
fn test_comparison_does_not_propagate() {
let mut engine = PropagationEngine::new();
let loc1 = Location::new("test.py", 1, 1);
let loc2 = Location::new("test.py", 2, 1);
engine.introduce_taint("tainted", TaintLabel::UserInput, loc1, None);
engine.propagate_binary_op("result", "tainted", "safe", "==", loc2);
assert!(!engine.state().is_variable_tainted("result"));
}
#[test]
fn test_string_concat_propagates() {
let mut engine = PropagationEngine::new();
let loc1 = Location::new("test.py", 1, 1);
let loc2 = Location::new("test.py", 2, 1);
engine.introduce_taint("tainted", TaintLabel::UserInput, loc1, None);
engine.propagate_string_concat("result", &["safe", "tainted"], loc2);
assert!(engine.state().is_variable_tainted("result"));
}
#[test]
fn test_type_conversion_propagates() {
let mut engine = PropagationEngine::new();
let loc1 = Location::new("test.py", 1, 1);
let loc2 = Location::new("test.py", 2, 1);
engine.introduce_taint("tainted", TaintLabel::UserInput, loc1, None);
engine.propagate_type_conversion("result", "tainted", "int", loc2);
assert!(engine.state().is_variable_tainted("result"));
}
#[test]
fn test_serialization_propagates() {
let mut engine = PropagationEngine::new();
let loc1 = Location::new("test.py", 1, 1);
let loc2 = Location::new("test.py", 2, 1);
engine.introduce_taint("tainted", TaintLabel::UserInput, loc1, None);
engine.propagate_serialization("result", "tainted", "json", loc2);
assert!(engine.state().is_variable_tainted("result"));
}
#[test]
fn test_propagation_binary_op() {
let mut engine = PropagationEngine::new();
let loc1 = Location::new("test.py", 1, 1);
let loc2 = Location::new("test.py", 2, 1);
let loc3 = Location::new("test.py", 3, 1);
engine.introduce_taint("a", TaintLabel::UserInput, loc1, None);
engine.introduce_taint("b", TaintLabel::FileContent, loc2, None);
engine.propagate_binary_op("c", "a", "b", "+", loc3);
let c_taint = engine.state().get_variable("c").unwrap();
assert!(c_taint.has_label(&TaintLabel::UserInput));
assert!(c_taint.has_label(&TaintLabel::FileContent));
}
#[test]
fn test_sanitization() {
let mut engine = PropagationEngine::new();
let loc1 = Location::new("test.py", 1, 1);
let loc2 = Location::new("test.py", 2, 1);
engine.introduce_taint("x", TaintLabel::UserInput, loc1, None);
assert!(engine.state().is_variable_tainted("x"));
engine.sanitize("x", "html_escape", loc2);
assert!(!engine.state().is_variable_tainted("x"));
}
#[test]
fn test_collection_propagation() {
let mut engine = PropagationEngine::new();
let loc1 = Location::new("test.py", 1, 1);
let loc2 = Location::new("test.py", 2, 1);
let loc3 = Location::new("test.py", 3, 1);
engine.introduce_taint("value", TaintLabel::UserInput, loc1, None);
engine.propagate_collection_store("arr", "value", loc2);
engine.propagate_collection_access("elem", "arr", "[0]", loc3);
assert!(engine.state().is_variable_tainted("elem"));
}
#[test]
fn test_implicit_flow_tracking() {
let mut engine = PropagationEngine::with_config(PropagationConfig {
track_implicit_flows: true,
..Default::default()
});
let loc1 = Location::new("test.py", 1, 1);
let loc2 = Location::new("test.py", 5, 1);
engine.introduce_taint("condition", TaintLabel::UserInput, loc1, None);
engine.enter_branch("condition");
engine.propagate_assignment("x", "safe_value", loc2);
engine.exit_branch();
}
#[test]
fn test_function_summary() {
let summary = FunctionTaintSummary::new("html_escape")
.as_sanitizer("html_escape");
assert!(summary.is_sanitizer);
assert_eq!(summary.sanitization_method, Some("html_escape".to_string()));
}
#[test]
fn test_propagation_rules() {
let rules = PropagationRules::python_defaults();
assert!(rules.is_sanitizer("html.escape"));
assert!(!rules.is_sanitizer("str.upper"));
let escape_rule = rules.get_rule("html.escape").unwrap();
assert!(escape_rule.is_sanitizer);
assert_eq!(
escape_rule.sanitization_method,
Some("html_escape".to_string())
);
}
#[test]
fn test_taint_flow_description() {
let flow = TaintFlow {
source: Location::new("input.py", 5, 1),
sink: Location::new("query.py", 20, 1),
labels: [TaintLabel::UserInput].into_iter().collect(),
path: vec![],
sink_type: "sql_query".to_string(),
variable: "user_data".to_string(),
confidence: 0.95,
source_expression: Some("request.get('id')".to_string()),
};
let desc = flow.description();
assert!(desc.contains("input.py:5:1"));
assert!(desc.contains("query.py:20:1"));
assert!(desc.contains("user_data"));
}
#[test]
fn test_tainted_dfg_creation() {
let dfg = DFGInfo::new(
"test_func".to_string(),
vec![],
HashMap::new(),
HashMap::new(),
);
let mut tainted = TaintedDFG::from_dfg(&dfg, "test.py");
tainted.mark_source(10, TaintLabel::UserInput);
assert!(!tainted.source_lines.is_empty());
assert!(tainted.source_lines.contains_key(&10));
let node = NodeId::from_line(10);
assert!(tainted.is_tainted(&node));
}
#[test]
fn test_taint_trace_result_json() {
let mut result = TaintTraceResult::new("test.py", 10);
result.tainted_node_count = 5;
result.tainted_lines.insert(10, vec!["UserInput".to_string()]);
result.tainted_lines.insert(11, vec!["UserInput".to_string()]);
let json = result.to_json();
assert_eq!(json["file"], "test.py");
assert_eq!(json["start_line"], 10);
assert_eq!(json["tainted_node_count"], 5);
}
#[test]
fn test_call_with_sanitizer_summary() {
let mut engine = PropagationEngine::new();
engine.register_summary(
FunctionTaintSummary::new("html_escape")
.as_sanitizer("html_escape")
);
let loc1 = Location::new("test.py", 1, 1);
let loc2 = Location::new("test.py", 2, 1);
engine.introduce_taint("tainted_input", TaintLabel::UserInput, loc1, None);
engine.propagate_call_return("result", "html_escape", &["tainted_input"], loc2);
assert!(!engine.state().is_variable_tainted("result"));
}
#[test]
fn test_call_with_propagating_summary() {
let mut engine = PropagationEngine::new();
let mut summary = FunctionTaintSummary::new("str.upper");
summary.param_to_return.insert(0);
summary.return_tainted = true;
engine.register_summary(summary);
let loc1 = Location::new("test.py", 1, 1);
let loc2 = Location::new("test.py", 2, 1);
engine.introduce_taint("tainted_input", TaintLabel::UserInput, loc1, None);
engine.propagate_call_return("result", "str.upper", &["tainted_input"], loc2);
assert!(engine.state().is_variable_tainted("result"));
}
}