circomspect_program_analysis/
taint_analysis.rs

1use log::{debug, trace};
2use program_structure::cfg::parameters::Parameters;
3use program_structure::intermediate_representation::value_meta::ValueMeta;
4use program_structure::intermediate_representation::Meta;
5use std::collections::{HashMap, HashSet};
6
7use program_structure::cfg::Cfg;
8use program_structure::ir::variable_meta::{VariableMeta, VariableUse};
9use program_structure::ir::{Expression, Statement, VariableName};
10
11#[derive(Clone, Default)]
12pub struct TaintAnalysis {
13    taint_map: HashMap<VariableName, HashSet<VariableName>>,
14    declarations: HashMap<VariableName, VariableUse>,
15    definitions: HashMap<VariableName, VariableUse>,
16}
17
18impl TaintAnalysis {
19    fn new(parameters: &Parameters) -> TaintAnalysis {
20        // Add parameter definitions to taint analysis.
21        let mut result = TaintAnalysis::default();
22        let meta = Meta::new(parameters.file_location(), parameters.file_id());
23        for name in parameters.iter() {
24            trace!("adding parameter declaration for `{name:?}`");
25            let definition = VariableUse::new(&meta, name, &Vec::new());
26            result.add_definition(&definition);
27        }
28        result
29    }
30
31    /// Add the variable use corresponding to the definition of the variable.
32    fn add_definition(&mut self, var: &VariableUse) {
33        // TODO: Since we don't version components and signals, we may end up
34        // overwriting component initializations here. For example, in the
35        // following case the component initialization will be clobbered.
36        //
37        //   component c[2];
38        //   ...
39        //   c[0].in[0] <== 0;
40        //   c[1].in[1] <== 1;
41        //
42        // As long as the initialized component flows to a constraint it will
43        // not be flagged during side-effect analysis.
44        self.definitions.insert(var.name().clone(), var.clone());
45    }
46
47    /// Get the variable use corresponding to the definition of the variable.
48    pub fn get_definition(&self, var: &VariableName) -> Option<VariableUse> {
49        self.definitions.get(var).cloned()
50    }
51
52    pub fn definitions(&self) -> impl Iterator<Item = &VariableUse> {
53        self.definitions.values()
54    }
55
56    /// Add the variable use corresponding to the declaration of the variable.
57    fn add_declaration(&mut self, var: &VariableUse) {
58        self.declarations.insert(var.name().clone(), var.clone());
59    }
60
61    /// Get the variable use corresponding to the declaration of the variable.
62    pub fn get_declaration(&self, var: &VariableName) -> Option<VariableUse> {
63        self.declarations.get(var).cloned()
64    }
65
66    pub fn declarations(&self) -> impl Iterator<Item = &VariableUse> {
67        self.declarations.values()
68    }
69
70    /// Add a single step taint from source to sink.
71    fn add_taint_step(&mut self, source: &VariableName, sink: &VariableName) {
72        let sinks = self.taint_map.entry(source.clone()).or_default();
73        sinks.insert(sink.clone());
74    }
75
76    /// Returns variables tainted in a single step by `source`.
77    pub fn single_step_taint(&self, source: &VariableName) -> HashSet<VariableName> {
78        self.taint_map.get(source).cloned().unwrap_or_default()
79    }
80
81    /// Returns variables tainted in zero or more steps by `source`.
82    pub fn multi_step_taint(&self, source: &VariableName) -> HashSet<VariableName> {
83        let mut result = HashSet::new();
84        let mut update = HashSet::from([source.clone()]);
85        while !update.is_subset(&result) {
86            result.extend(update.iter().cloned());
87            update = update.iter().flat_map(|source| self.single_step_taint(source)).collect();
88        }
89        result
90    }
91
92    /// Returns true if the source taints any of the sinks.
93    pub fn taints_any(&self, source: &VariableName, sinks: &HashSet<VariableName>) -> bool {
94        self.multi_step_taint(source).iter().any(|sink| sinks.contains(sink))
95    }
96}
97
98pub fn run_taint_analysis(cfg: &Cfg) -> TaintAnalysis {
99    debug!("running taint analysis pass");
100    let mut result = TaintAnalysis::new(cfg.parameters());
101
102    use Expression::*;
103    use Statement::*;
104    for basic_block in cfg.iter() {
105        for stmt in basic_block.iter() {
106            trace!("visiting statement `{stmt:?}`");
107            match stmt {
108                Substitution { .. } => {
109                    // Variables read taint variables written by the statement.
110                    for sink in stmt.variables_written() {
111                        if !matches!(stmt, Substitution { rhe: Phi { .. }, .. }) {
112                            // Add the definition to the result.
113                            trace!("adding variable assignment for `{:?}`", sink.name());
114                            result.add_definition(sink);
115                        }
116                        for source in stmt.variables_read() {
117                            // Add each taint step to the result.
118                            trace!(
119                                "adding taint step with source `{:?}` and sink `{:?}`",
120                                source.name(),
121                                sink.name()
122                            );
123                            result.add_taint_step(source.name(), sink.name());
124                        }
125                    }
126                }
127                Declaration { meta, names, dimensions, .. } => {
128                    // Variables occurring in declarations taint the declared variable.
129                    for sink in names {
130                        result.add_declaration(&VariableUse::new(meta, sink, &Vec::new()));
131                        for size in dimensions {
132                            for source in size.variables_read() {
133                                result.add_taint_step(source.name(), sink)
134                            }
135                        }
136                    }
137                }
138                IfThenElse { cond, .. } => {
139                    // A variable which occurs in a non-constant condition taints all
140                    // variables assigned in the if-statement body.
141                    if cond.value().is_some() {
142                        continue;
143                    }
144                    let true_branch = cfg.get_true_branch(basic_block);
145                    let false_branch = cfg.get_false_branch(basic_block);
146                    for body in true_branch.iter().chain(false_branch.iter()) {
147                        // Add taint for assigned variables.
148                        for sink in body.variables_written() {
149                            for source in cond.variables_read() {
150                                // Add each taint step to the result.
151                                trace!(
152                                    "adding taint step with source `{:?}` and sink `{:?}`",
153                                    source.name(),
154                                    sink.name()
155                                );
156                                result.add_taint_step(source.name(), sink.name());
157                            }
158                        }
159                    }
160                }
161                // The following statement types do not propagate taint.
162                Assert { .. } | LogCall { .. } | Return { .. } | ConstraintEquality { .. } => {}
163            }
164        }
165    }
166    result
167}
168
169#[cfg(test)]
170mod tests {
171    use std::collections::HashMap;
172
173    use parser::parse_definition;
174    use program_structure::cfg::IntoCfg;
175    use program_structure::constants::Curve;
176    use program_structure::report::ReportCollection;
177
178    use super::*;
179
180    #[test]
181    fn test_taint_analysis() {
182        let src = r#"
183            template PointOnLine(k, m, n) {
184                signal input in[2];
185
186                var LOGK = log2(k);
187                var LOGK2 = log2(3 * k * k);
188                assert(3 * n + LOGK2 < 251);
189
190                component left = BigTemplate(n, k, 2 * n + LOGK + 1);
191                left.a <== in[0];
192                left.b <== in[1];
193
194                component right[m];
195                for (var i = 0; i < n; i++) {
196                    right[0] = SmallTemplate(k);
197                }
198            }
199        "#;
200
201        let mut taint_map = HashMap::new();
202        taint_map.insert(
203            "k",
204            HashSet::from([
205                "k".to_string(),
206                "LOGK".to_string(),
207                "LOGK2".to_string(),
208                "left".to_string(),
209                "right".to_string(),
210            ]),
211        );
212        taint_map.insert(
213            "m",
214            HashSet::from([
215                "m".to_string(),
216                "right".to_string(), // Since `right` is declared as an `m` dimensional array.
217            ]),
218        );
219        taint_map.insert(
220            "n",
221            HashSet::from([
222                "n".to_string(),
223                "i".to_string(), // Since the update `i++` depends on the condition `i < n`.
224                "left".to_string(),
225                "right".to_string(),
226            ]),
227        );
228        taint_map.insert("i", HashSet::from(["i".to_string(), "right".to_string()]));
229
230        validate_taint(src, &taint_map);
231    }
232
233    fn validate_taint(src: &str, taint_map: &HashMap<&str, HashSet<String>>) {
234        // Build CFG.
235        let mut reports = ReportCollection::new();
236        let cfg = parse_definition(src)
237            .unwrap()
238            .into_cfg(&Curve::default(), &mut reports)
239            .unwrap()
240            .into_ssa()
241            .unwrap();
242        assert!(reports.is_empty());
243
244        let taint_analysis = run_taint_analysis(&cfg);
245        for (source, expected_sinks) in taint_map {
246            let source = VariableName::from_string(source).with_version(0);
247            let sinks = taint_analysis
248                .multi_step_taint(&source)
249                .iter()
250                .map(|var| var.name().to_string())
251                .collect::<HashSet<_>>();
252            assert_eq!(&sinks, expected_sinks);
253        }
254    }
255}