Skip to main content

agentshield/analysis/
cross_file.rs

1//! Cross-file sanitizer-aware validation tracking.
2//!
3//! Runs after parsing, before detection. When a function is only ever called
4//! with sanitized arguments, downgrades its parameters' `ArgumentSource` from
5//! tainted to `Sanitized`. This eliminates false positives from internal
6//! helper functions that receive already-validated input from their callers.
7
8use std::collections::HashMap;
9use std::path::PathBuf;
10
11use crate::ir::ArgumentSource;
12use crate::parser::ParsedFile;
13
14/// Known sanitizer function names and their categories.
15static SANITIZER_NAMES: &[&str] = &[
16    // Path sanitizers
17    "validatePath",
18    "sanitizePath",
19    "normalizePath",
20    "resolvePath",
21    "canonicalizePath",
22    "realpath",
23    // Node.js path module (method part after dot)
24    "resolve",
25    "normalize",
26    // Python path functions
27    "abspath",
28    "normpath",
29    // URL sanitizers
30    "parseUrl",
31    "urlparse",
32    // Type coercion
33    "parseInt",
34    "parseFloat",
35    "Number",
36    "int",
37    "float",
38    "str",
39];
40
41/// Check if a function name (or the method part of `obj.method`) is a sanitizer.
42pub fn is_sanitizer(name: &str) -> bool {
43    // Check exact match
44    if SANITIZER_NAMES.contains(&name) {
45        return true;
46    }
47    // Check method part: "path.resolve" → "resolve"
48    if let Some(method) = name.rsplit('.').next() {
49        if SANITIZER_NAMES.contains(&method) {
50            return true;
51        }
52    }
53    // Check common patterns
54    let lower = name.to_lowercase();
55    lower.contains("validate") && (lower.contains("path") || lower.contains("url"))
56}
57
58/// Result of cross-file sanitization analysis.
59#[derive(Debug)]
60pub struct CrossFileResult {
61    /// Number of operations whose ArgumentSource was downgraded.
62    pub downgraded_count: usize,
63    /// Functions determined to receive only sanitized input.
64    pub sanitized_functions: Vec<String>,
65}
66
67/// Perform cross-file sanitizer-aware analysis on parsed files.
68///
69/// For each function definition, checks if ALL discovered call sites pass
70/// sanitized (or literal) arguments for each parameter. If so, downgrades
71/// the function's operations from tainted to `Sanitized`.
72///
73/// Conservative: exported functions with zero discovered call sites keep
74/// their parameters tainted.
75pub fn apply_cross_file_sanitization(
76    parsed_files: &mut [(PathBuf, ParsedFile)],
77) -> CrossFileResult {
78    let mut downgraded_count = 0;
79    let mut sanitized_functions = Vec::new();
80
81    // Phase 1: Build function definition map.
82    // Key: function name → (file index, param names)
83    let mut func_defs: HashMap<String, Vec<(usize, Vec<String>, bool)>> = HashMap::new();
84    for (idx, (_, parsed)) in parsed_files.iter().enumerate() {
85        for def in &parsed.function_defs {
86            func_defs.entry(def.name.clone()).or_default().push((
87                idx,
88                def.params.clone(),
89                def.is_exported,
90            ));
91        }
92    }
93
94    // Phase 2: Build call-site map.
95    // Key: callee name → Vec of (argument sources)
96    let mut call_sites: HashMap<String, Vec<Vec<ArgumentSource>>> = HashMap::new();
97    for (_, parsed) in parsed_files.iter() {
98        for cs in &parsed.call_sites {
99            call_sites
100                .entry(cs.callee.clone())
101                .or_default()
102                .push(cs.arguments.clone());
103        }
104    }
105
106    // Phase 3: Determine which functions have all-sanitized parameters.
107    // For each function with a definition AND call sites, check if every
108    // call site passes safe (Literal or Sanitized) values for each param.
109    let mut params_to_downgrade: Vec<(usize, String, String)> = Vec::new(); // (file_idx, param_name, sanitizer)
110
111    for (func_name, defs) in &func_defs {
112        let sites = match call_sites.get(func_name) {
113            Some(s) if !s.is_empty() => s,
114            _ => {
115                // No discovered call sites. If exported, stay conservative.
116                continue;
117            }
118        };
119
120        for (file_idx, params, _is_exported) in defs {
121            // Check each parameter position
122            for (param_idx, param_name) in params.iter().enumerate() {
123                let all_safe = sites.iter().all(|args| {
124                    args.get(param_idx)
125                        .map(|arg| !arg.is_tainted())
126                        .unwrap_or(false) // Missing arg = can't prove safe
127                });
128
129                if all_safe {
130                    params_to_downgrade.push((*file_idx, param_name.clone(), func_name.clone()));
131                }
132            }
133        }
134    }
135
136    // Phase 4: Downgrade operations in the target functions.
137    for (file_idx, param_name, func_name) in &params_to_downgrade {
138        let (_, parsed) = &mut parsed_files[*file_idx];
139        let sanitizer_label = format!("caller passes sanitized value to {func_name}");
140
141        let sanitized = ArgumentSource::Sanitized {
142            sanitizer: sanitizer_label.clone(),
143        };
144
145        // Downgrade matching ArgumentSource::Parameter in all operation types
146        for cmd in &mut parsed.commands {
147            if matches!(&cmd.command_arg, ArgumentSource::Parameter { name } if name == param_name)
148            {
149                cmd.command_arg = sanitized.clone();
150                downgraded_count += 1;
151            }
152        }
153        for op in &mut parsed.file_operations {
154            if matches!(&op.path_arg, ArgumentSource::Parameter { name } if name == param_name) {
155                op.path_arg = sanitized.clone();
156                downgraded_count += 1;
157            }
158        }
159        for op in &mut parsed.network_operations {
160            if matches!(&op.url_arg, ArgumentSource::Parameter { name } if name == param_name) {
161                op.url_arg = sanitized.clone();
162                downgraded_count += 1;
163            }
164        }
165        for op in &mut parsed.dynamic_exec {
166            if matches!(&op.code_arg, ArgumentSource::Parameter { name } if name == param_name) {
167                op.code_arg = sanitized.clone();
168                downgraded_count += 1;
169            }
170        }
171
172        if !sanitized_functions.contains(func_name) {
173            sanitized_functions.push(func_name.clone());
174        }
175    }
176
177    CrossFileResult {
178        downgraded_count,
179        sanitized_functions,
180    }
181}
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186    use crate::ir::execution_surface::{FileOpType, FileOperation};
187    use crate::ir::SourceLocation;
188    use crate::parser::{CallSite, FunctionDef};
189
190    fn loc(file: &str, line: usize) -> SourceLocation {
191        SourceLocation {
192            file: PathBuf::from(file),
193            line,
194            column: 0,
195            end_line: None,
196            end_column: None,
197        }
198    }
199
200    #[test]
201    fn sanitizer_names_recognized() {
202        assert!(is_sanitizer("validatePath"));
203        assert!(is_sanitizer("path.resolve"));
204        assert!(is_sanitizer("os.path.realpath"));
205        assert!(is_sanitizer("parseInt"));
206        assert!(is_sanitizer("urlparse"));
207        assert!(!is_sanitizer("processData"));
208        assert!(!is_sanitizer("readFile"));
209    }
210
211    #[test]
212    fn custom_validate_path_recognized() {
213        assert!(is_sanitizer("validate_path"));
214        assert!(is_sanitizer("validateUrl"));
215    }
216
217    #[test]
218    fn cross_file_downgrade() {
219        // File A (index.ts): calls readFileContent with sanitized arg
220        let mut file_a = ParsedFile::default();
221        file_a.call_sites.push(CallSite {
222            callee: "readFileContent".into(),
223            arguments: vec![ArgumentSource::Sanitized {
224                sanitizer: "validatePath".into(),
225            }],
226            caller: Some("handleRead".into()),
227            location: loc("index.ts", 5),
228        });
229
230        // File B (lib.ts): defines readFileContent, uses filePath param
231        let mut file_b = ParsedFile::default();
232        file_b.function_defs.push(FunctionDef {
233            name: "readFileContent".into(),
234            params: vec!["filePath".into()],
235            is_exported: true,
236            location: loc("lib.ts", 1),
237        });
238        file_b.file_operations.push(FileOperation {
239            path_arg: ArgumentSource::Parameter {
240                name: "filePath".into(),
241            },
242            operation: FileOpType::Read,
243            location: loc("lib.ts", 3),
244        });
245
246        let mut files = vec![
247            (PathBuf::from("index.ts"), file_a),
248            (PathBuf::from("lib.ts"), file_b),
249        ];
250
251        let result = apply_cross_file_sanitization(&mut files);
252
253        assert_eq!(result.downgraded_count, 1);
254        assert_eq!(result.sanitized_functions, vec!["readFileContent"]);
255
256        // Verify the operation was downgraded
257        let lib_ops = &files[1].1.file_operations;
258        assert!(!lib_ops[0].path_arg.is_tainted());
259        assert!(matches!(
260            &lib_ops[0].path_arg,
261            ArgumentSource::Sanitized { .. }
262        ));
263    }
264
265    #[test]
266    fn no_downgrade_when_unsanitized_caller_exists() {
267        // Two call sites: one safe, one tainted
268        let mut file_a = ParsedFile::default();
269        file_a.call_sites.push(CallSite {
270            callee: "readFile".into(),
271            arguments: vec![ArgumentSource::Sanitized {
272                sanitizer: "validatePath".into(),
273            }],
274            caller: Some("safeHandler".into()),
275            location: loc("safe.ts", 5),
276        });
277        file_a.call_sites.push(CallSite {
278            callee: "readFile".into(),
279            arguments: vec![ArgumentSource::Parameter {
280                name: "userInput".into(),
281            }],
282            caller: Some("unsafeHandler".into()),
283            location: loc("safe.ts", 10),
284        });
285
286        let mut file_b = ParsedFile::default();
287        file_b.function_defs.push(FunctionDef {
288            name: "readFile".into(),
289            params: vec!["path".into()],
290            is_exported: true,
291            location: loc("lib.ts", 1),
292        });
293        file_b.file_operations.push(FileOperation {
294            path_arg: ArgumentSource::Parameter {
295                name: "path".into(),
296            },
297            operation: FileOpType::Read,
298            location: loc("lib.ts", 3),
299        });
300
301        let mut files = vec![
302            (PathBuf::from("safe.ts"), file_a),
303            (PathBuf::from("lib.ts"), file_b),
304        ];
305
306        let result = apply_cross_file_sanitization(&mut files);
307
308        assert_eq!(result.downgraded_count, 0);
309        // Operation stays tainted
310        assert!(files[1].1.file_operations[0].path_arg.is_tainted());
311    }
312
313    #[test]
314    fn no_downgrade_for_exported_with_no_callers() {
315        let mut file_a = ParsedFile::default();
316        file_a.function_defs.push(FunctionDef {
317            name: "dangerousFunc".into(),
318            params: vec!["input".into()],
319            is_exported: true,
320            location: loc("lib.ts", 1),
321        });
322        file_a.file_operations.push(FileOperation {
323            path_arg: ArgumentSource::Parameter {
324                name: "input".into(),
325            },
326            operation: FileOpType::Write,
327            location: loc("lib.ts", 3),
328        });
329
330        let mut files = vec![(PathBuf::from("lib.ts"), file_a)];
331
332        let result = apply_cross_file_sanitization(&mut files);
333
334        assert_eq!(result.downgraded_count, 0);
335        assert!(files[0].1.file_operations[0].path_arg.is_tainted());
336    }
337
338    #[test]
339    fn downgrade_only_matching_params() {
340        // Function with 2 params, only first is always sanitized
341        let mut file_a = ParsedFile::default();
342        file_a.call_sites.push(CallSite {
343            callee: "copyFile".into(),
344            arguments: vec![
345                ArgumentSource::Sanitized {
346                    sanitizer: "validatePath".into(),
347                },
348                ArgumentSource::Parameter {
349                    name: "rawDest".into(),
350                },
351            ],
352            caller: Some("handler".into()),
353            location: loc("index.ts", 5),
354        });
355
356        let mut file_b = ParsedFile::default();
357        file_b.function_defs.push(FunctionDef {
358            name: "copyFile".into(),
359            params: vec!["src".into(), "dest".into()],
360            is_exported: true,
361            location: loc("lib.ts", 1),
362        });
363        // Two file operations, one per param
364        file_b.file_operations.push(FileOperation {
365            path_arg: ArgumentSource::Parameter { name: "src".into() },
366            operation: FileOpType::Read,
367            location: loc("lib.ts", 3),
368        });
369        file_b.file_operations.push(FileOperation {
370            path_arg: ArgumentSource::Parameter {
371                name: "dest".into(),
372            },
373            operation: FileOpType::Write,
374            location: loc("lib.ts", 4),
375        });
376
377        let mut files = vec![
378            (PathBuf::from("index.ts"), file_a),
379            (PathBuf::from("lib.ts"), file_b),
380        ];
381
382        let result = apply_cross_file_sanitization(&mut files);
383
384        assert_eq!(result.downgraded_count, 1); // Only src
385        assert!(!files[1].1.file_operations[0].path_arg.is_tainted()); // src: safe
386        assert!(files[1].1.file_operations[1].path_arg.is_tainted()); // dest: still tainted
387    }
388}