Skip to main content

agentshield/adapter/
mcp.rs

1use std::path::{Path, PathBuf};
2
3use crate::analysis::cross_file::apply_cross_file_sanitization;
4use crate::config::ScanPathFilter;
5use crate::error::Result;
6use crate::ir::execution_surface::ExecutionSurface;
7use crate::ir::taint_builder::build_data_surface;
8use crate::ir::*;
9use crate::parser;
10
11/// MCP Server adapter.
12///
13/// Detects MCP servers by looking for:
14/// - package.json with `@modelcontextprotocol/sdk` dependency
15/// - Python files importing `mcp` or `mcp.server`
16/// - mcp.json / mcp-config.json manifest
17pub struct McpAdapter;
18
19impl super::Adapter for McpAdapter {
20    fn framework(&self) -> Framework {
21        Framework::Mcp
22    }
23
24    fn detect(&self, root: &Path) -> bool {
25        super::mcp_metadata::metadata_root_for_scan(root).is_some()
26    }
27
28    fn load(&self, root: &Path, ignore_tests: bool) -> Result<Vec<ScanTarget>> {
29        let filter = ScanPathFilter::for_ignore_tests(ignore_tests);
30        self.load_with_filter(root, &filter)
31    }
32
33    fn load_with_filter(&self, root: &Path, filter: &ScanPathFilter) -> Result<Vec<ScanTarget>> {
34        let metadata_root =
35            super::mcp_metadata::metadata_root_for_scan(root).unwrap_or_else(|| root.to_path_buf());
36        let name = root
37            .file_name()
38            .map(|n| n.to_string_lossy().to_string())
39            .unwrap_or_else(|| "mcp-server".into());
40
41        let mut source_files = Vec::new();
42        let mut execution = ExecutionSurface::default();
43        let mut tools = Vec::new();
44
45        // Collect source files
46        collect_source_files_with_filter(root, filter, &mut source_files)?;
47        for source_file in &source_files {
48            if matches!(
49                source_file.language,
50                Language::TypeScript | Language::JavaScript
51            ) {
52                tools.extend(extract_mcp_tools_from_source(
53                    &source_file.path,
54                    &source_file.content,
55                ));
56            }
57        }
58
59        // Phase 1: Parse each source file, collecting results for cross-file analysis.
60        let mut parsed_files: Vec<(PathBuf, parser::ParsedFile)> = Vec::new();
61        for sf in &source_files {
62            if let Some(parser) = parser::parser_for_language(sf.language) {
63                if let Ok(parsed) = parser.parse_file(&sf.path, &sf.content) {
64                    parsed_files.push((sf.path.clone(), parsed));
65                }
66            }
67        }
68
69        // Phase 2: Cross-file sanitizer-aware analysis — downgrade operations
70        // in functions that are only called with sanitized arguments.
71        apply_cross_file_sanitization(&mut parsed_files);
72
73        // Phase 3: Merge parsed results into execution surface.
74        for (_, parsed) in parsed_files {
75            execution.commands.extend(parsed.commands);
76            execution.file_operations.extend(parsed.file_operations);
77            execution
78                .network_operations
79                .extend(parsed.network_operations);
80            execution.env_accesses.extend(parsed.env_accesses);
81            execution.dynamic_exec.extend(parsed.dynamic_exec);
82        }
83
84        // Parse tool definitions from JSON if available
85        let tools_json = root.join("tools.json");
86        if tools_json.exists() && filter.allows_path(root, &tools_json) {
87            if let Ok(content) = std::fs::read_to_string(&tools_json) {
88                if let Ok(value) = serde_json::from_str::<serde_json::Value>(&content) {
89                    tools.extend(parser::json_schema::parse_tools_from_json(&value));
90                    tools = dedupe_tools_by_name(tools);
91                }
92            }
93        }
94
95        let (dependencies, provenance) = if super::mcp_metadata::same_path(root, &metadata_root) {
96            (
97                parse_dependencies(root, filter),
98                parse_provenance(root, filter),
99            )
100        } else {
101            (
102                parse_dependencies(&metadata_root, filter),
103                parse_provenance(&metadata_root, filter),
104            )
105        };
106
107        let data = build_data_surface(&tools, &execution);
108
109        Ok(vec![ScanTarget {
110            name,
111            framework: Framework::Mcp,
112            root_path: metadata_root,
113            tools,
114            execution,
115            data,
116            dependencies,
117            provenance,
118            source_files,
119        }])
120    }
121}
122
123/// Check if a file path belongs to a test file or test directory.
124///
125/// Matches common conventions across Python, TypeScript, and JavaScript:
126/// - Directories: `test/`, `tests/`, `__tests__/`, `__pycache__/`
127/// - Suffixes: `.test.{ts,js,tsx,jsx,py,sh}`, `.spec.{ts,js,tsx,jsx,py,sh}`
128/// - Python conventions: `test_*.py`, `*_test.py`
129/// - Config files: `conftest.py`, `jest.config.*`, `vitest.config.*`, `pytest.ini`, `setup.cfg`
130pub fn is_test_file(path: &Path) -> bool {
131    // Check if any path component is a test directory
132    for component in path.components() {
133        if let std::path::Component::Normal(name) = component {
134            let name = name.to_string_lossy();
135            if matches!(
136                name.as_ref(),
137                "test" | "tests" | "__tests__" | "__pycache__"
138            ) {
139                return true;
140            }
141        }
142    }
143
144    let file_name = match path.file_name() {
145        Some(n) => n.to_string_lossy(),
146        None => return false,
147    };
148    let file_name = file_name.as_ref();
149
150    // Test config files
151    if matches!(file_name, "conftest.py" | "pytest.ini" | "setup.cfg")
152        || file_name.starts_with("jest.config.")
153        || file_name.starts_with("vitest.config.")
154    {
155        return true;
156    }
157
158    // pytest conventions: test_*.py and *_test.py
159    if file_name.ends_with(".py")
160        && (file_name.starts_with("test_") || file_name.ends_with("_test.py"))
161    {
162        return true;
163    }
164
165    // Suffix conventions: *.test.{ts,js,tsx,jsx,py,sh}, *.spec.{ts,js,tsx,jsx,py,sh}
166    for suffix in [
167        ".test.ts",
168        ".test.js",
169        ".test.tsx",
170        ".test.jsx",
171        ".test.py",
172        ".test.sh",
173        ".spec.ts",
174        ".spec.js",
175        ".spec.tsx",
176        ".spec.jsx",
177        ".spec.py",
178        ".spec.sh",
179    ] {
180        if file_name.ends_with(suffix) {
181            return true;
182        }
183    }
184
185    false
186}
187
188fn extract_mcp_tools_from_source(path: &Path, content: &str) -> Vec<ToolSurface> {
189    let mut tools = Vec::new();
190    let mut offset = 0;
191
192    while let Some(relative_start) = find_next_mcp_tool_call(&content[offset..]) {
193        let call_start = offset + relative_start;
194        let Some(open_paren) = content[call_start..].find('(').map(|pos| call_start + pos) else {
195            break;
196        };
197        let args_start = open_paren + 1;
198        let Some((name, after_name)) = parse_string_literal_at(content, args_start) else {
199            offset = args_start;
200            continue;
201        };
202        let description = parse_next_string_argument(content, after_name);
203        let line = content[..call_start].lines().count() + 1;
204
205        tools.push(ToolSurface {
206            name,
207            description,
208            input_schema: None,
209            output_schema: None,
210            declared_permissions: Vec::new(),
211            defined_at: Some(source_loc(path, line)),
212        });
213
214        offset = after_name;
215    }
216
217    dedupe_tools_by_name(tools)
218}
219
220fn find_next_mcp_tool_call(content: &str) -> Option<usize> {
221    match (content.find(".tool("), content.find(".registerTool(")) {
222        (Some(tool), Some(register_tool)) => Some(tool.min(register_tool)),
223        (Some(tool), None) => Some(tool),
224        (None, Some(register_tool)) => Some(register_tool),
225        (None, None) => None,
226    }
227}
228
229fn parse_next_string_argument(content: &str, offset: usize) -> Option<String> {
230    let mut index = skip_whitespace(content, offset);
231    if content[index..].starts_with(',') {
232        index += 1;
233    } else {
234        return None;
235    }
236
237    let index = skip_whitespace(content, index);
238    parse_string_literal_at(content, index).map(|(value, _)| value)
239}
240
241fn parse_string_literal_at(content: &str, offset: usize) -> Option<(String, usize)> {
242    let offset = skip_whitespace(content, offset);
243    let quote = content[offset..].chars().next()?;
244    if !matches!(quote, '\'' | '"' | '`') {
245        return None;
246    }
247
248    let mut value = String::new();
249    let mut escaped = false;
250    for (relative_index, ch) in content[offset + quote.len_utf8()..].char_indices() {
251        let absolute_index = offset + quote.len_utf8() + relative_index;
252        if escaped {
253            value.push(ch);
254            escaped = false;
255            continue;
256        }
257        if ch == '\\' {
258            escaped = true;
259            continue;
260        }
261        if ch == quote {
262            return Some((value, absolute_index + quote.len_utf8()));
263        }
264        value.push(ch);
265    }
266
267    None
268}
269
270fn skip_whitespace(content: &str, mut offset: usize) -> usize {
271    while let Some(ch) = content[offset..].chars().next() {
272        if !ch.is_whitespace() {
273            break;
274        }
275        offset += ch.len_utf8();
276    }
277    offset
278}
279
280fn dedupe_tools_by_name(tools: Vec<ToolSurface>) -> Vec<ToolSurface> {
281    let mut seen = std::collections::HashSet::new();
282    let mut deduped = Vec::new();
283    for tool in tools {
284        if seen.insert(tool.name.clone()) {
285            deduped.push(tool);
286        }
287    }
288    deduped
289}
290
291fn source_loc(file: &Path, line: usize) -> SourceLocation {
292    SourceLocation {
293        file: file.to_path_buf(),
294        line,
295        column: 0,
296        end_line: None,
297        end_column: None,
298    }
299}
300
301pub(super) fn collect_source_files_with_filter(
302    root: &Path,
303    filter: &ScanPathFilter,
304    files: &mut Vec<SourceFile>,
305) -> Result<()> {
306    let walker = ignore::WalkBuilder::new(root)
307        .hidden(true)
308        .git_ignore(true)
309        .max_depth(Some(5))
310        .build();
311
312    for entry in walker.flatten() {
313        let path = entry.path();
314        if !path.is_file() {
315            continue;
316        }
317
318        if filter.ignore_tests() && is_test_file(path) {
319            continue;
320        }
321
322        if !filter.allows_path(root, path) {
323            continue;
324        }
325
326        let ext = path
327            .extension()
328            .map(|e| e.to_string_lossy().to_string())
329            .unwrap_or_default();
330        let lang = Language::from_extension(&ext);
331
332        if matches!(lang, Language::Unknown) {
333            continue;
334        }
335
336        // Skip files larger than 1MB
337        let metadata = std::fs::metadata(path)?;
338        if metadata.len() > 1_048_576 {
339            continue;
340        }
341
342        if let Ok(content) = std::fs::read_to_string(path) {
343            let hash = format!(
344                "{:x}",
345                sha2::Digest::finalize(sha2::Sha256::new().chain_update(content.as_bytes()))
346            );
347            files.push(SourceFile {
348                path: path.to_path_buf(),
349                language: lang,
350                size_bytes: metadata.len(),
351                content_hash: hash,
352                content,
353            });
354        }
355    }
356
357    Ok(())
358}
359
360pub(super) fn parse_dependencies(
361    root: &Path,
362    filter: &ScanPathFilter,
363) -> dependency_surface::DependencySurface {
364    use crate::ir::dependency_surface::*;
365    let mut surface = DependencySurface::default();
366
367    // Parse requirements.txt as a dependency manifest (NOT a lockfile)
368    let req_file = root.join("requirements.txt");
369    if req_file.exists() && filter.allows_path(root, &req_file) {
370        if let Ok(content) = std::fs::read_to_string(&req_file) {
371            for (idx, line) in content.lines().enumerate() {
372                let line = line.trim();
373                if line.is_empty() || line.starts_with('#') || line.starts_with('-') {
374                    continue;
375                }
376                let (name, version) = if let Some(pos) = line.find("==") {
377                    (
378                        line[..pos].trim().to_string(),
379                        Some(line[pos + 2..].trim().to_string()),
380                    )
381                } else if let Some(pos) = line.find(">=") {
382                    (
383                        line[..pos].trim().to_string(),
384                        Some(line[pos..].trim().to_string()),
385                    )
386                } else {
387                    (line.to_string(), None)
388                };
389
390                surface.dependencies.push(Dependency {
391                    name,
392                    version_constraint: version,
393                    locked_version: None,
394                    locked_hash: None,
395                    registry: "pypi".into(),
396                    is_dev: false,
397                    location: Some(SourceLocation {
398                        file: req_file.clone(),
399                        line: idx + 1,
400                        column: 0,
401                        end_line: None,
402                        end_column: None,
403                    }),
404                });
405            }
406        }
407    }
408
409    // Check for actual Python lockfiles
410    for (filename, format) in [
411        ("Pipfile.lock", LockfileFormat::PipenvLock),
412        ("poetry.lock", LockfileFormat::PoetryLock),
413        ("uv.lock", LockfileFormat::UvLock),
414    ] {
415        let lock_path = root.join(filename);
416        if lock_path.exists() && filter.allows_path(root, &lock_path) {
417            surface.lockfile = Some(LockfileInfo {
418                path: lock_path,
419                format,
420                all_pinned: true,
421                all_hashed: false,
422            });
423            break;
424        }
425    }
426
427    // Parse package.json dependencies
428    let pkg_json = root.join("package.json");
429    if pkg_json.exists() && filter.allows_path(root, &pkg_json) {
430        if let Ok(content) = std::fs::read_to_string(&pkg_json) {
431            if let Ok(value) = serde_json::from_str::<serde_json::Value>(&content) {
432                for (key, is_dev) in [("dependencies", false), ("devDependencies", true)] {
433                    if let Some(deps) = value.get(key).and_then(|v| v.as_object()) {
434                        for (name, version) in deps {
435                            let line = find_json_key_line(&content, name);
436                            surface.dependencies.push(Dependency {
437                                name: name.clone(),
438                                version_constraint: version.as_str().map(|s| s.to_string()),
439                                locked_version: None,
440                                locked_hash: None,
441                                registry: "npm".into(),
442                                is_dev,
443                                location: Some(SourceLocation {
444                                    file: pkg_json.clone(),
445                                    line,
446                                    column: 0,
447                                    end_line: None,
448                                    end_column: None,
449                                }),
450                            });
451                        }
452                    }
453                }
454            }
455        }
456
457        // Check for lockfile
458        let lock = root.join("package-lock.json");
459        if lock.exists() {
460            surface.lockfile = Some(LockfileInfo {
461                path: lock,
462                format: dependency_surface::LockfileFormat::NpmLock,
463                all_pinned: true,
464                all_hashed: false,
465            });
466        }
467    }
468
469    surface
470}
471
472/// Find the 1-based line number where a JSON key (e.g. `"package-name"`) appears.
473/// Falls back to line 1 if the key is not found.
474fn find_json_key_line(content: &str, key: &str) -> usize {
475    let needle = format!("\"{}\"", key);
476    for (idx, line) in content.lines().enumerate() {
477        if line.contains(&needle) {
478            return idx + 1;
479        }
480    }
481    1
482}
483
484pub(super) fn parse_provenance(
485    root: &Path,
486    filter: &ScanPathFilter,
487) -> provenance_surface::ProvenanceSurface {
488    let mut prov = provenance_surface::ProvenanceSurface::default();
489
490    // From package.json
491    let pkg_json = root.join("package.json");
492    if pkg_json.exists() && filter.allows_path(root, &pkg_json) {
493        if let Ok(content) = std::fs::read_to_string(&pkg_json) {
494            if let Ok(value) = serde_json::from_str::<serde_json::Value>(&content) {
495                prov.author = value
496                    .get("author")
497                    .and_then(|v| v.as_str())
498                    .map(|s| s.to_string());
499                prov.repository = value
500                    .get("repository")
501                    .and_then(|v| v.get("url").or(Some(v)))
502                    .and_then(|v| v.as_str())
503                    .map(|s| s.to_string());
504                prov.license = value
505                    .get("license")
506                    .and_then(|v| v.as_str())
507                    .map(|s| s.to_string());
508            }
509        }
510    }
511
512    // From pyproject.toml
513    let pyproject = root.join("pyproject.toml");
514    if pyproject.exists() && filter.allows_path(root, &pyproject) {
515        if let Ok(content) = std::fs::read_to_string(&pyproject) {
516            if let Ok(value) = content.parse::<toml::Value>() {
517                if let Some(project) = value.get("project") {
518                    prov.license = project
519                        .get("license")
520                        .and_then(|v| v.get("text").or(Some(v)))
521                        .and_then(|v| v.as_str())
522                        .map(|s| s.to_string());
523                    if let Some(authors) = project.get("authors").and_then(|v| v.as_array()) {
524                        if let Some(first) = authors.first() {
525                            prov.author = first
526                                .get("name")
527                                .and_then(|v| v.as_str())
528                                .map(|s| s.to_string());
529                        }
530                    }
531                }
532                if let Some(urls) = value.get("project").and_then(|p| p.get("urls")) {
533                    prov.repository = urls
534                        .get("Repository")
535                        .or(urls.get("repository"))
536                        .and_then(|v| v.as_str())
537                        .map(|s| s.to_string());
538                }
539            }
540        }
541    }
542
543    prov
544}
545
546use sha2::Digest;
547
548#[cfg(test)]
549mod tests {
550    use super::*;
551
552    #[test]
553    fn test_file_detection_covers_shell_and_suffix_python_tests() {
554        assert!(is_test_file(Path::new("scripts/check.test.sh")));
555        assert!(is_test_file(Path::new("scripts/check.spec.sh")));
556        assert!(is_test_file(Path::new("scripts/import_data_test.py")));
557        assert!(is_test_file(Path::new("tests/unit.py")));
558        assert!(!is_test_file(Path::new("scripts/load.py")));
559    }
560
561    #[test]
562    fn extracts_typescript_mcp_server_tool_declarations() {
563        let content = r#"
564const server = new McpServer({ name: "demo" })
565
566server.tool(
567  'search_party',
568  'Busca fuzzy por nome.',
569  {},
570  async () => ({ content: [] })
571)
572
573server.registerTool("create_report", { description: "Create report" }, async () => {})
574"#;
575
576        let tools = extract_mcp_tools_from_source(Path::new("src/mcp/server.ts"), content);
577        assert_eq!(tools.len(), 2);
578        assert_eq!(tools[0].name, "search_party");
579        assert_eq!(
580            tools[0].description.as_deref(),
581            Some("Busca fuzzy por nome.")
582        );
583        assert_eq!(tools[0].defined_at.as_ref().map(|loc| loc.line), Some(5));
584        assert_eq!(tools[1].name, "create_report");
585        assert_eq!(tools[1].description, None);
586    }
587}