1use std::path::PathBuf;
7
8use super::data_surface::*;
9use super::execution_surface::*;
10use super::tool_surface::ToolSurface;
11use super::ArgumentSource;
12use super::SourceLocation;
13
14pub fn build_data_surface(tools: &[ToolSurface], execution: &ExecutionSurface) -> DataSurface {
20 let sources = collect_sources(tools, execution);
21 let sinks = collect_sinks(execution);
22 let taint_paths = build_taint_paths(&sources, execution);
23
24 DataSurface {
25 sources,
26 sinks,
27 taint_paths,
28 }
29}
30
31fn collect_sources(tools: &[ToolSurface], execution: &ExecutionSurface) -> Vec<TaintSource> {
33 let mut sources = Vec::new();
34
35 for tool in tools {
37 let location = tool.defined_at.clone().unwrap_or_else(|| SourceLocation {
38 file: PathBuf::from("<unknown>"),
39 line: 0,
40 column: 0,
41 end_line: None,
42 end_column: None,
43 });
44
45 if let Some(ref schema) = tool.input_schema {
46 if let Some(props) = schema.get("properties").and_then(|v| v.as_object()) {
47 for param_name in props.keys() {
48 sources.push(TaintSource {
49 source_type: TaintSourceType::ToolArgument,
50 description: format!("Tool '{}' parameter '{}'", tool.name, param_name),
51 location: location.clone(),
52 });
53 }
54 }
55 }
56 }
57
58 for env in &execution.env_accesses {
60 let var_desc = match &env.var_name {
61 ArgumentSource::Literal(name) => name.clone(),
62 ArgumentSource::EnvVar { name } => name.clone(),
63 ArgumentSource::Parameter { name } => format!("(dynamic: {})", name),
64 _ => "(dynamic)".to_string(),
65 };
66 sources.push(TaintSource {
67 source_type: TaintSourceType::EnvVariable,
68 description: format!("Environment variable '{}'", var_desc),
69 location: env.location.clone(),
70 });
71 }
72
73 sources
74}
75
76fn collect_sinks(execution: &ExecutionSurface) -> Vec<TaintSink> {
78 let mut sinks = Vec::new();
79
80 for cmd in &execution.commands {
81 sinks.push(TaintSink {
82 sink_type: TaintSinkType::ProcessExec,
83 description: format!("Process execution via {}", cmd.function),
84 location: cmd.location.clone(),
85 });
86 }
87
88 for net in &execution.network_operations {
89 sinks.push(TaintSink {
90 sink_type: TaintSinkType::HttpRequest,
91 description: format!("HTTP request via {}", net.function),
92 location: net.location.clone(),
93 });
94 }
95
96 for file_op in &execution.file_operations {
97 if matches!(file_op.operation, FileOpType::Write) {
98 sinks.push(TaintSink {
99 sink_type: TaintSinkType::FileWrite,
100 description: "File write operation".to_string(),
101 location: file_op.location.clone(),
102 });
103 }
104 }
105
106 for dyn_exec in &execution.dynamic_exec {
107 sinks.push(TaintSink {
108 sink_type: TaintSinkType::DynamicEval,
109 description: format!("Dynamic code execution via {}", dyn_exec.function),
110 location: dyn_exec.location.clone(),
111 });
112 }
113
114 sinks
115}
116
117fn build_taint_paths(sources: &[TaintSource], execution: &ExecutionSurface) -> Vec<TaintPath> {
122 let mut paths = Vec::new();
123
124 for cmd in &execution.commands {
126 if cmd.command_arg.is_tainted() {
127 let source = resolve_source(sources, &cmd.command_arg, &cmd.location);
128 paths.push(TaintPath {
129 source,
130 sink: TaintSink {
131 sink_type: TaintSinkType::ProcessExec,
132 description: format!("Process execution via {}", cmd.function),
133 location: cmd.location.clone(),
134 },
135 through: vec![],
136 confidence: confidence_for_arg(&cmd.command_arg),
137 });
138 }
139 }
140
141 for net in &execution.network_operations {
143 if net.url_arg.is_tainted() {
144 let source = resolve_source(sources, &net.url_arg, &net.location);
145 paths.push(TaintPath {
146 source,
147 sink: TaintSink {
148 sink_type: TaintSinkType::HttpRequest,
149 description: format!("HTTP request via {}", net.function),
150 location: net.location.clone(),
151 },
152 through: vec![],
153 confidence: confidence_for_arg(&net.url_arg),
154 });
155 }
156 }
157
158 for file_op in &execution.file_operations {
160 if matches!(file_op.operation, FileOpType::Write) && file_op.path_arg.is_tainted() {
161 let source = resolve_source(sources, &file_op.path_arg, &file_op.location);
162 paths.push(TaintPath {
163 source,
164 sink: TaintSink {
165 sink_type: TaintSinkType::FileWrite,
166 description: "File write operation".to_string(),
167 location: file_op.location.clone(),
168 },
169 through: vec![],
170 confidence: confidence_for_arg(&file_op.path_arg),
171 });
172 }
173 }
174
175 for dyn_exec in &execution.dynamic_exec {
177 if dyn_exec.code_arg.is_tainted() {
178 let source = resolve_source(sources, &dyn_exec.code_arg, &dyn_exec.location);
179 paths.push(TaintPath {
180 source,
181 sink: TaintSink {
182 sink_type: TaintSinkType::DynamicEval,
183 description: format!("Dynamic code execution via {}", dyn_exec.function),
184 location: dyn_exec.location.clone(),
185 },
186 through: vec![],
187 confidence: confidence_for_arg(&dyn_exec.code_arg),
188 });
189 }
190 }
191
192 paths
193}
194
195fn resolve_source(
200 sources: &[TaintSource],
201 arg: &ArgumentSource,
202 fallback_location: &SourceLocation,
203) -> TaintSource {
204 match arg {
205 ArgumentSource::Parameter { name } => {
206 if let Some(found) = sources.iter().find(|s| {
208 s.source_type == TaintSourceType::ToolArgument && s.description.contains(name)
209 }) {
210 return found.clone();
211 }
212 TaintSource {
213 source_type: TaintSourceType::ToolArgument,
214 description: format!("Function parameter '{}'", name),
215 location: fallback_location.clone(),
216 }
217 }
218 ArgumentSource::EnvVar { name } => {
219 if let Some(found) = sources.iter().find(|s| {
220 s.source_type == TaintSourceType::EnvVariable && s.description.contains(name)
221 }) {
222 return found.clone();
223 }
224 TaintSource {
225 source_type: TaintSourceType::EnvVariable,
226 description: format!("Environment variable '{}'", name),
227 location: fallback_location.clone(),
228 }
229 }
230 ArgumentSource::Interpolated => TaintSource {
231 source_type: TaintSourceType::ToolArgument,
232 description: "Interpolated string (potentially user-controlled)".to_string(),
233 location: fallback_location.clone(),
234 },
235 ArgumentSource::Unknown => TaintSource {
236 source_type: TaintSourceType::ToolArgument,
237 description: "Unknown source (could not determine origin)".to_string(),
238 location: fallback_location.clone(),
239 },
240 ArgumentSource::Literal(_) | ArgumentSource::Sanitized { .. } => TaintSource {
242 source_type: TaintSourceType::ToolArgument,
243 description: "Unexpected safe source".to_string(),
244 location: fallback_location.clone(),
245 },
246 }
247}
248
249fn confidence_for_arg(arg: &ArgumentSource) -> f32 {
251 match arg {
252 ArgumentSource::Parameter { .. } => 0.9,
253 ArgumentSource::Interpolated => 0.8,
254 ArgumentSource::EnvVar { .. } => 0.7,
255 ArgumentSource::Unknown => 0.5,
256 ArgumentSource::Literal(_) | ArgumentSource::Sanitized { .. } => 0.1,
257 }
258}
259
260#[cfg(test)]
261mod tests {
262 use super::*;
263 use crate::ir::tool_surface::ToolSurface;
264 use crate::ir::ArgumentSource;
265 use serde_json::json;
266 use std::path::PathBuf;
267
268 fn make_location(line: usize) -> SourceLocation {
269 SourceLocation {
270 file: PathBuf::from("test.py"),
271 line,
272 column: 0,
273 end_line: None,
274 end_column: None,
275 }
276 }
277
278 fn make_tool(name: &str, params: &[&str]) -> ToolSurface {
279 let mut properties = serde_json::Map::new();
280 for p in params {
281 properties.insert(p.to_string(), json!({"type": "string"}));
282 }
283 ToolSurface {
284 name: name.to_string(),
285 description: Some("test tool".to_string()),
286 input_schema: Some(json!({"properties": properties})),
287 output_schema: None,
288 declared_permissions: vec![],
289 defined_at: Some(make_location(1)),
290 }
291 }
292
293 #[test]
294 fn test_sources_from_tool_parameters() {
295 let tools = vec![make_tool("run_cmd", &["command", "cwd"])];
296 let execution = ExecutionSurface::default();
297
298 let surface = build_data_surface(&tools, &execution);
299
300 assert_eq!(surface.sources.len(), 2);
301 assert!(surface
302 .sources
303 .iter()
304 .all(|s| s.source_type == TaintSourceType::ToolArgument));
305 assert!(surface
306 .sources
307 .iter()
308 .any(|s| s.description.contains("command")));
309 assert!(surface
310 .sources
311 .iter()
312 .any(|s| s.description.contains("cwd")));
313 }
314
315 #[test]
316 fn test_sources_from_env_accesses() {
317 let tools = vec![];
318 let execution = ExecutionSurface {
319 env_accesses: vec![EnvAccess {
320 var_name: ArgumentSource::Literal("API_KEY".to_string()),
321 is_sensitive: true,
322 location: make_location(10),
323 }],
324 ..Default::default()
325 };
326
327 let surface = build_data_surface(&tools, &execution);
328
329 assert_eq!(surface.sources.len(), 1);
330 assert_eq!(surface.sources[0].source_type, TaintSourceType::EnvVariable);
331 assert!(surface.sources[0].description.contains("API_KEY"));
332 }
333
334 #[test]
335 fn test_sinks_from_commands() {
336 let execution = ExecutionSurface {
337 commands: vec![CommandInvocation {
338 function: "subprocess.run".to_string(),
339 command_arg: ArgumentSource::Parameter {
340 name: "cmd".to_string(),
341 },
342 location: make_location(5),
343 }],
344 ..Default::default()
345 };
346
347 let surface = build_data_surface(&[], &execution);
348
349 assert_eq!(surface.sinks.len(), 1);
350 assert_eq!(surface.sinks[0].sink_type, TaintSinkType::ProcessExec);
351 assert!(surface.sinks[0].description.contains("subprocess.run"));
352 }
353
354 #[test]
355 fn test_sinks_from_network_operations() {
356 let execution = ExecutionSurface {
357 network_operations: vec![NetworkOperation {
358 function: "requests.get".to_string(),
359 url_arg: ArgumentSource::Interpolated,
360 method: Some("GET".to_string()),
361 sends_data: false,
362 location: make_location(8),
363 }],
364 ..Default::default()
365 };
366
367 let surface = build_data_surface(&[], &execution);
368
369 assert_eq!(surface.sinks.len(), 1);
370 assert_eq!(surface.sinks[0].sink_type, TaintSinkType::HttpRequest);
371 }
372
373 #[test]
374 fn test_sinks_from_file_write_only() {
375 let execution = ExecutionSurface {
376 file_operations: vec![
377 FileOperation {
378 operation: FileOpType::Read,
379 path_arg: ArgumentSource::Parameter {
380 name: "path".to_string(),
381 },
382 location: make_location(3),
383 },
384 FileOperation {
385 operation: FileOpType::Write,
386 path_arg: ArgumentSource::Parameter {
387 name: "out".to_string(),
388 },
389 location: make_location(7),
390 },
391 ],
392 ..Default::default()
393 };
394
395 let surface = build_data_surface(&[], &execution);
396
397 assert_eq!(surface.sinks.len(), 1);
399 assert_eq!(surface.sinks[0].sink_type, TaintSinkType::FileWrite);
400 assert_eq!(surface.sinks[0].location.line, 7);
401 }
402
403 #[test]
404 fn test_sinks_from_dynamic_exec() {
405 let execution = ExecutionSurface {
406 dynamic_exec: vec![DynamicExec {
407 function: "eval".to_string(),
408 code_arg: ArgumentSource::Unknown,
409 location: make_location(12),
410 }],
411 ..Default::default()
412 };
413
414 let surface = build_data_surface(&[], &execution);
415
416 assert_eq!(surface.sinks.len(), 1);
417 assert_eq!(surface.sinks[0].sink_type, TaintSinkType::DynamicEval);
418 }
419
420 #[test]
421 fn test_taint_path_from_parameter_to_command() {
422 let tools = vec![make_tool("exec_tool", &["command"])];
423 let execution = ExecutionSurface {
424 commands: vec![CommandInvocation {
425 function: "subprocess.run".to_string(),
426 command_arg: ArgumentSource::Parameter {
427 name: "command".to_string(),
428 },
429 location: make_location(10),
430 }],
431 ..Default::default()
432 };
433
434 let surface = build_data_surface(&tools, &execution);
435
436 assert_eq!(surface.taint_paths.len(), 1);
437 let path = &surface.taint_paths[0];
438 assert_eq!(path.source.source_type, TaintSourceType::ToolArgument);
439 assert!(path.source.description.contains("command"));
440 assert_eq!(path.sink.sink_type, TaintSinkType::ProcessExec);
441 assert!((path.confidence - 0.9).abs() < f32::EPSILON);
442 assert!(path.through.is_empty());
443 }
444
445 #[test]
446 fn test_no_taint_path_for_literal() {
447 let execution = ExecutionSurface {
448 commands: vec![CommandInvocation {
449 function: "subprocess.run".to_string(),
450 command_arg: ArgumentSource::Literal("ls -la".to_string()),
451 location: make_location(5),
452 }],
453 ..Default::default()
454 };
455
456 let surface = build_data_surface(&[], &execution);
457
458 assert_eq!(surface.sinks.len(), 1);
460 assert!(
461 surface.taint_paths.is_empty(),
462 "literal args should not produce taint paths"
463 );
464 }
465
466 #[test]
467 fn test_no_taint_path_for_sanitized() {
468 let execution = ExecutionSurface {
469 commands: vec![CommandInvocation {
470 function: "subprocess.run".to_string(),
471 command_arg: ArgumentSource::Sanitized {
472 sanitizer: "validateCommand".to_string(),
473 },
474 location: make_location(5),
475 }],
476 ..Default::default()
477 };
478
479 let surface = build_data_surface(&[], &execution);
480
481 assert_eq!(surface.sinks.len(), 1);
482 assert!(
483 surface.taint_paths.is_empty(),
484 "sanitized args should not produce taint paths"
485 );
486 }
487
488 #[test]
489 fn test_interpolated_confidence() {
490 let execution = ExecutionSurface {
491 network_operations: vec![NetworkOperation {
492 function: "requests.get".to_string(),
493 url_arg: ArgumentSource::Interpolated,
494 method: Some("GET".to_string()),
495 sends_data: false,
496 location: make_location(15),
497 }],
498 ..Default::default()
499 };
500
501 let surface = build_data_surface(&[], &execution);
502
503 assert_eq!(surface.taint_paths.len(), 1);
504 assert!((surface.taint_paths[0].confidence - 0.8).abs() < f32::EPSILON);
505 }
506
507 #[test]
508 fn test_envvar_confidence() {
509 let execution = ExecutionSurface {
510 commands: vec![CommandInvocation {
511 function: "os.system".to_string(),
512 command_arg: ArgumentSource::EnvVar {
513 name: "CMD".to_string(),
514 },
515 location: make_location(3),
516 }],
517 ..Default::default()
518 };
519
520 let surface = build_data_surface(&[], &execution);
521
522 assert_eq!(surface.taint_paths.len(), 1);
523 assert!((surface.taint_paths[0].confidence - 0.7).abs() < f32::EPSILON);
524 }
525
526 #[test]
527 fn test_unknown_confidence() {
528 let execution = ExecutionSurface {
529 dynamic_exec: vec![DynamicExec {
530 function: "eval".to_string(),
531 code_arg: ArgumentSource::Unknown,
532 location: make_location(20),
533 }],
534 ..Default::default()
535 };
536
537 let surface = build_data_surface(&[], &execution);
538
539 assert_eq!(surface.taint_paths.len(), 1);
540 assert!((surface.taint_paths[0].confidence - 0.5).abs() < f32::EPSILON);
541 }
542
543 #[test]
544 fn test_tool_without_schema_produces_no_sources() {
545 let tools = vec![ToolSurface {
546 name: "no_schema".to_string(),
547 description: None,
548 input_schema: None,
549 output_schema: None,
550 declared_permissions: vec![],
551 defined_at: None,
552 }];
553
554 let surface = build_data_surface(&tools, &ExecutionSurface::default());
555
556 assert!(surface.sources.is_empty());
557 assert!(surface.sinks.is_empty());
558 assert!(surface.taint_paths.is_empty());
559 }
560
561 #[test]
562 fn test_combined_sources_sinks_paths() {
563 let tools = vec![make_tool("fetch", &["url"])];
564 let execution = ExecutionSurface {
565 commands: vec![CommandInvocation {
566 function: "subprocess.run".to_string(),
567 command_arg: ArgumentSource::Literal("echo hi".to_string()),
568 location: make_location(5),
569 }],
570 network_operations: vec![NetworkOperation {
571 function: "requests.get".to_string(),
572 url_arg: ArgumentSource::Parameter {
573 name: "url".to_string(),
574 },
575 method: Some("GET".to_string()),
576 sends_data: false,
577 location: make_location(10),
578 }],
579 env_accesses: vec![EnvAccess {
580 var_name: ArgumentSource::Literal("TOKEN".to_string()),
581 is_sensitive: true,
582 location: make_location(2),
583 }],
584 ..Default::default()
585 };
586
587 let surface = build_data_surface(&tools, &execution);
588
589 assert_eq!(surface.sources.len(), 2);
591 assert_eq!(surface.sinks.len(), 2);
593 assert_eq!(surface.taint_paths.len(), 1);
595 assert_eq!(
596 surface.taint_paths[0].sink.sink_type,
597 TaintSinkType::HttpRequest
598 );
599 }
600
601 #[test]
602 fn test_data_surface_from_vuln_fixture() {
603 use crate::adapter::Adapter;
604
605 let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
606 .join("tests/fixtures/mcp_servers/vuln_cmd_inject");
607 let adapter = crate::adapter::mcp::McpAdapter;
608 let targets = adapter.load(&dir, false).unwrap();
609 assert_eq!(targets.len(), 1);
610
611 let target = &targets[0];
612
613 assert!(
615 !target.data.sinks.is_empty(),
616 "vuln_cmd_inject should produce taint sinks"
617 );
618
619 assert!(
621 target
622 .data
623 .sinks
624 .iter()
625 .any(|s| s.sink_type == TaintSinkType::ProcessExec),
626 "expected ProcessExec sink from subprocess usage"
627 );
628
629 assert!(
631 !target.data.taint_paths.is_empty(),
632 "vuln_cmd_inject should produce taint paths from parameter to subprocess"
633 );
634
635 assert!(
637 target.data.taint_paths.iter().any(|p| p.confidence >= 0.8),
638 "expected high-confidence taint path"
639 );
640 }
641}