mir_extractor/rules/
advanced_utils.rs1use once_cell::sync::Lazy;
7use regex::Regex;
8use std::collections::HashSet;
9
10pub fn detect_assignment(line: &str) -> Option<String> {
12 static RE_ASSIGN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(_\d+)\s*=").expect("assign regex"));
13
14 if let Some(caps) = RE_ASSIGN.captures(line) {
15 return Some(caps[1].to_string());
16 }
17
18 if line.starts_with("(*_") {
19 if let Some(end) = line.find(')') {
20 return Some(line[2..end].to_string());
21 }
22 }
23
24 None
25}
26
27pub fn extract_call_args(line: &str) -> Vec<String> {
29 static RE_ARG: Lazy<Regex> =
30 Lazy::new(|| Regex::new(r"(?:copy|move)\s+(_\d+)").expect("arg regex"));
31
32 RE_ARG
33 .captures_iter(line)
34 .map(|caps| caps[1].to_string())
35 .collect()
36}
37
38pub fn detect_len_call(line: &str) -> Option<(String, String)> {
40 static RE_LEN: Lazy<Regex> =
41 Lazy::new(|| Regex::new(r"^(_\d+)\s*=.*::len\((?:move|copy)\s+(_\d+)").expect("len regex"));
42
43 RE_LEN
44 .captures(line)
45 .map(|caps| (caps[1].to_string(), caps[2].to_string()))
46}
47
48pub fn detect_len_comparison(line: &str) -> Option<String> {
50 static RE_LEN_CMP: Lazy<Regex> = Lazy::new(|| {
51 Regex::new(r"(?:Gt|Lt|Ge|Le)\((?:move|copy)\s+(_\d+),\s*const").expect("len cmp regex")
52 });
53
54 RE_LEN_CMP.captures(line).map(|caps| caps[1].to_string())
55}
56
57pub fn contains_var(text: &str, var: &str) -> bool {
59 if text.contains(var) {
60 return true;
61 }
62
63 let var_num = var.trim_start_matches('_');
64 text.contains(&format!("move _{}", var_num))
65 || text.contains(&format!("copy _{}", var_num))
66 || text.contains(&format!("_{}.0", var_num))
67 || text.contains(&format!("_{}.1", var_num))
68 || text.contains(&format!("&_{}", var_num))
69 || text.contains(&format!("(*_{})", var_num))
70}
71
72pub fn detect_const_string_assignment(line: &str) -> Option<(String, String)> {
74 static RE_CONST_STR: Lazy<Regex> = Lazy::new(|| {
75 Regex::new(r#"^(_\d+)\s*=\s*const\s*\"((?:\\.|[^\"])*)\""#).expect("const string regex")
76 });
77
78 RE_CONST_STR.captures(line).map(|caps| {
79 let var = caps[1].to_string();
80 let literal = caps[2].to_string();
81 (var, literal)
82 })
83}
84
85pub fn detect_var_alias(line: &str) -> Option<(String, String)> {
87 static RE_ALIAS: Lazy<Regex> =
88 Lazy::new(|| Regex::new(r"^(_\d+)\s*=\s*(?:copy|move)\s+(_\d+)").expect("alias regex"));
89
90 RE_ALIAS
91 .captures(line)
92 .map(|caps| (caps[1].to_string(), caps[2].to_string()))
93}
94
95#[allow(dead_code)]
97pub fn detect_drop_calls(line: &str) -> Vec<String> {
98 static RE_DROP: Lazy<Regex> =
99 Lazy::new(|| Regex::new(r"drop\(\s*(?:move\s+)?(_\d+)\s*\)").expect("drop call regex"));
100
101 RE_DROP
102 .captures_iter(line)
103 .map(|caps| caps[1].to_string())
104 .collect()
105}
106
107#[allow(dead_code)]
109pub fn detect_storage_dead_vars(line: &str) -> Vec<String> {
110 static RE_DEAD: Lazy<Regex> =
111 Lazy::new(|| Regex::new(r"StorageDead\(\s*(_\d+)\s*\)").expect("storage dead regex"));
112
113 RE_DEAD
114 .captures_iter(line)
115 .map(|caps| caps[1].to_string())
116 .collect()
117}
118
119pub fn extract_const_literals(line: &str) -> Vec<String> {
121 static RE_LITERAL: Lazy<Regex> =
122 Lazy::new(|| Regex::new(r#"const\s*\"((?:\\.|[^\"])*)\""#).expect("literal regex"));
123
124 RE_LITERAL
125 .captures_iter(line)
126 .map(|caps| caps[1].to_string())
127 .collect()
128}
129
130pub fn unescape_rust_literal(raw: &str) -> String {
132 let mut result = String::with_capacity(raw.len());
133 let mut chars = raw.chars();
134 while let Some(ch) = chars.next() {
135 if ch == '\\' {
136 if let Some(next) = chars.next() {
137 match next {
138 'n' => result.push('\n'),
139 'r' => result.push('\r'),
140 't' => result.push('\t'),
141 '\\' => result.push('\\'),
142 '"' => result.push('"'),
143 other => {
144 result.push(other);
145 }
146 }
147 }
148 } else {
149 result.push(ch);
150 }
151 }
152 result
153}
154
155pub fn pattern_is_high_risk(pattern: &str) -> bool {
157 static RE_NESTED_QUANTIFIERS: Lazy<Regex> = Lazy::new(|| {
158 Regex::new(r"\((?:[^()]|\\.)*[+*](?:[^()]|\\.)*\)[+*{]").expect("nested quantifier regex")
159 });
160
161 static RE_DOT_STAR_LOOP: Lazy<Regex> =
162 Lazy::new(|| Regex::new(r"\(\?:?\.\*(?:[^()]|\\.)*\)[+*{]").expect("dot-star loop regex"));
163
164 let simplified = pattern.replace(' ', "");
165 RE_NESTED_QUANTIFIERS.is_match(&simplified) || RE_DOT_STAR_LOOP.is_match(&simplified)
166}
167
168pub const UNTRUSTED_PATTERNS: &[&str] = &[
170 "env::var",
171 "env::var_os",
172 "env::args",
173 "std::env::var",
174 "std::env::args",
175 "stdin",
176 "TcpStream",
177 "read_to_string",
178 "read_to_end",
179 "fs::read",
180 "File::open",
181];
182
183pub fn is_untrusted_source(line: &str) -> bool {
185 UNTRUSTED_PATTERNS
186 .iter()
187 .any(|pattern| line.contains(pattern))
188}
189
190pub fn is_derive_macro_function(func_name: &str) -> bool {
192 static RE_DERIVE: Lazy<Regex> = Lazy::new(|| {
193 Regex::new(r"<impl at [^>]+:\d+:\d+:\s*\d+:\d+>::").expect("derive macro regex")
194 });
195 RE_DERIVE.is_match(func_name)
196}
197
198pub fn is_safe_trait_method(func_name: &str, _func_signature: &str) -> bool {
200 let safe_methods = [
201 "::eq",
202 "::ne",
203 "::partial_cmp",
204 "::cmp",
205 "::hash",
206 "::fmt",
207 "::clone",
208 "::default",
209 "::drop",
210 ];
211 safe_methods.iter().any(|m| func_name.ends_with(m))
212}
213
214#[derive(Default)]
216pub struct TaintTracker {
217 pub tainted: HashSet<String>,
218 pub taint_roots: std::collections::HashMap<String, String>,
219 pub sanitized_roots: HashSet<String>,
220 pub sources: std::collections::HashMap<String, String>,
221}
222
223impl TaintTracker {
224 pub fn mark_source(&mut self, var: &str, origin: &str) {
225 let var = var.to_string();
226 self.tainted.insert(var.clone());
227 self.taint_roots.insert(var.clone(), var.clone());
228 self.sources
229 .entry(var)
230 .or_insert_with(|| origin.trim().to_string());
231 }
232
233 pub fn mark_alias(&mut self, dest: &str, source: &str) {
234 if !self.tainted.contains(source) {
235 return;
236 }
237
238 if let Some(root) = self.taint_roots.get(source).cloned() {
239 self.tainted.insert(dest.to_string());
240 self.taint_roots.insert(dest.to_string(), root);
241 }
242 }
243
244 pub fn find_tainted_in_line(&self, line: &str) -> Option<String> {
245 self.tainted
246 .iter()
247 .find(|var| contains_var(line, var))
248 .cloned()
249 }
250
251 pub fn is_sanitized(&self, var: &str) -> bool {
252 if let Some(root) = self.taint_roots.get(var) {
253 self.sanitized_roots.contains(root)
254 } else {
255 false
256 }
257 }
258
259 pub fn sanitize_root(&mut self, root: &str) {
260 self.sanitized_roots.insert(root.to_string());
261 }
262}