1use std::path::{Path, PathBuf};
2
3use crate::analysis::cross_file::apply_cross_file_sanitization;
4use crate::config::ScanPathFilter;
5use crate::error::Result;
6use crate::ir::execution_surface::ExecutionSurface;
7use crate::ir::taint_builder::build_data_surface;
8use crate::ir::*;
9use crate::parser;
10
11pub struct McpAdapter;
18
19impl super::Adapter for McpAdapter {
20 fn framework(&self) -> Framework {
21 Framework::Mcp
22 }
23
24 fn detect(&self, root: &Path) -> bool {
25 super::mcp_metadata::metadata_root_for_scan(root).is_some()
26 }
27
28 fn load(&self, root: &Path, ignore_tests: bool) -> Result<Vec<ScanTarget>> {
29 let filter = ScanPathFilter::for_ignore_tests(ignore_tests);
30 self.load_with_filter(root, &filter)
31 }
32
33 fn load_with_filter(&self, root: &Path, filter: &ScanPathFilter) -> Result<Vec<ScanTarget>> {
34 let metadata_root =
35 super::mcp_metadata::metadata_root_for_scan(root).unwrap_or_else(|| root.to_path_buf());
36 let name = root
37 .file_name()
38 .map(|n| n.to_string_lossy().to_string())
39 .unwrap_or_else(|| "mcp-server".into());
40
41 let mut source_files = Vec::new();
42 let mut execution = ExecutionSurface::default();
43 let mut tools = Vec::new();
44
45 collect_source_files_with_filter(root, filter, &mut source_files)?;
47 for source_file in &source_files {
48 if matches!(
49 source_file.language,
50 Language::TypeScript | Language::JavaScript
51 ) {
52 tools.extend(extract_mcp_tools_from_source(
53 &source_file.path,
54 &source_file.content,
55 ));
56 }
57 }
58
59 let mut parsed_files: Vec<(PathBuf, parser::ParsedFile)> = Vec::new();
61 for sf in &source_files {
62 if let Some(parser) = parser::parser_for_language(sf.language) {
63 if let Ok(parsed) = parser.parse_file(&sf.path, &sf.content) {
64 parsed_files.push((sf.path.clone(), parsed));
65 }
66 }
67 }
68
69 apply_cross_file_sanitization(&mut parsed_files);
72
73 for (_, parsed) in parsed_files {
75 execution.commands.extend(parsed.commands);
76 execution.file_operations.extend(parsed.file_operations);
77 execution
78 .network_operations
79 .extend(parsed.network_operations);
80 execution.env_accesses.extend(parsed.env_accesses);
81 execution.dynamic_exec.extend(parsed.dynamic_exec);
82 }
83
84 let tools_json = root.join("tools.json");
86 if tools_json.exists() && filter.allows_path(root, &tools_json) {
87 if let Ok(content) = std::fs::read_to_string(&tools_json) {
88 if let Ok(value) = serde_json::from_str::<serde_json::Value>(&content) {
89 tools.extend(parser::json_schema::parse_tools_from_json(&value));
90 tools = dedupe_tools_by_name(tools);
91 }
92 }
93 }
94
95 let (dependencies, provenance) = if super::mcp_metadata::same_path(root, &metadata_root) {
96 (
97 parse_dependencies(root, filter),
98 parse_provenance(root, filter),
99 )
100 } else {
101 (
102 parse_dependencies(&metadata_root, filter),
103 parse_provenance(&metadata_root, filter),
104 )
105 };
106
107 let data = build_data_surface(&tools, &execution);
108
109 Ok(vec![ScanTarget {
110 name,
111 framework: Framework::Mcp,
112 root_path: metadata_root,
113 tools,
114 execution,
115 data,
116 dependencies,
117 provenance,
118 source_files,
119 }])
120 }
121}
122
123pub fn is_test_file(path: &Path) -> bool {
131 for component in path.components() {
133 if let std::path::Component::Normal(name) = component {
134 let name = name.to_string_lossy();
135 if matches!(
136 name.as_ref(),
137 "test" | "tests" | "__tests__" | "__pycache__"
138 ) {
139 return true;
140 }
141 }
142 }
143
144 let file_name = match path.file_name() {
145 Some(n) => n.to_string_lossy(),
146 None => return false,
147 };
148 let file_name = file_name.as_ref();
149
150 if matches!(file_name, "conftest.py" | "pytest.ini" | "setup.cfg")
152 || file_name.starts_with("jest.config.")
153 || file_name.starts_with("vitest.config.")
154 {
155 return true;
156 }
157
158 if file_name.ends_with(".py")
160 && (file_name.starts_with("test_") || file_name.ends_with("_test.py"))
161 {
162 return true;
163 }
164
165 for suffix in [
167 ".test.ts",
168 ".test.js",
169 ".test.tsx",
170 ".test.jsx",
171 ".test.py",
172 ".test.sh",
173 ".spec.ts",
174 ".spec.js",
175 ".spec.tsx",
176 ".spec.jsx",
177 ".spec.py",
178 ".spec.sh",
179 ] {
180 if file_name.ends_with(suffix) {
181 return true;
182 }
183 }
184
185 false
186}
187
188fn extract_mcp_tools_from_source(path: &Path, content: &str) -> Vec<ToolSurface> {
189 let mut tools = Vec::new();
190 let mut offset = 0;
191
192 while let Some(relative_start) = find_next_mcp_tool_call(&content[offset..]) {
193 let call_start = offset + relative_start;
194 let Some(open_paren) = content[call_start..].find('(').map(|pos| call_start + pos) else {
195 break;
196 };
197 let args_start = open_paren + 1;
198 let Some((name, after_name)) = parse_string_literal_at(content, args_start) else {
199 offset = args_start;
200 continue;
201 };
202 let description = parse_next_string_argument(content, after_name);
203 let line = content[..call_start].lines().count() + 1;
204
205 tools.push(ToolSurface {
206 name,
207 description,
208 input_schema: None,
209 output_schema: None,
210 declared_permissions: Vec::new(),
211 defined_at: Some(source_loc(path, line)),
212 });
213
214 offset = after_name;
215 }
216
217 dedupe_tools_by_name(tools)
218}
219
220fn find_next_mcp_tool_call(content: &str) -> Option<usize> {
221 match (content.find(".tool("), content.find(".registerTool(")) {
222 (Some(tool), Some(register_tool)) => Some(tool.min(register_tool)),
223 (Some(tool), None) => Some(tool),
224 (None, Some(register_tool)) => Some(register_tool),
225 (None, None) => None,
226 }
227}
228
229fn parse_next_string_argument(content: &str, offset: usize) -> Option<String> {
230 let mut index = skip_whitespace(content, offset);
231 if content[index..].starts_with(',') {
232 index += 1;
233 } else {
234 return None;
235 }
236
237 let index = skip_whitespace(content, index);
238 parse_string_literal_at(content, index).map(|(value, _)| value)
239}
240
241fn parse_string_literal_at(content: &str, offset: usize) -> Option<(String, usize)> {
242 let offset = skip_whitespace(content, offset);
243 let quote = content[offset..].chars().next()?;
244 if !matches!(quote, '\'' | '"' | '`') {
245 return None;
246 }
247
248 let mut value = String::new();
249 let mut escaped = false;
250 for (relative_index, ch) in content[offset + quote.len_utf8()..].char_indices() {
251 let absolute_index = offset + quote.len_utf8() + relative_index;
252 if escaped {
253 value.push(ch);
254 escaped = false;
255 continue;
256 }
257 if ch == '\\' {
258 escaped = true;
259 continue;
260 }
261 if ch == quote {
262 return Some((value, absolute_index + quote.len_utf8()));
263 }
264 value.push(ch);
265 }
266
267 None
268}
269
270fn skip_whitespace(content: &str, mut offset: usize) -> usize {
271 while let Some(ch) = content[offset..].chars().next() {
272 if !ch.is_whitespace() {
273 break;
274 }
275 offset += ch.len_utf8();
276 }
277 offset
278}
279
280fn dedupe_tools_by_name(tools: Vec<ToolSurface>) -> Vec<ToolSurface> {
281 let mut seen = std::collections::HashSet::new();
282 let mut deduped = Vec::new();
283 for tool in tools {
284 if seen.insert(tool.name.clone()) {
285 deduped.push(tool);
286 }
287 }
288 deduped
289}
290
291fn source_loc(file: &Path, line: usize) -> SourceLocation {
292 SourceLocation {
293 file: file.to_path_buf(),
294 line,
295 column: 0,
296 end_line: None,
297 end_column: None,
298 }
299}
300
301pub(super) fn collect_source_files_with_filter(
302 root: &Path,
303 filter: &ScanPathFilter,
304 files: &mut Vec<SourceFile>,
305) -> Result<()> {
306 let walker = ignore::WalkBuilder::new(root)
307 .hidden(true)
308 .git_ignore(true)
309 .max_depth(Some(5))
310 .build();
311
312 for entry in walker.flatten() {
313 let path = entry.path();
314 if !path.is_file() {
315 continue;
316 }
317
318 if filter.ignore_tests() && is_test_file(path) {
319 continue;
320 }
321
322 if !filter.allows_path(root, path) {
323 continue;
324 }
325
326 let ext = path
327 .extension()
328 .map(|e| e.to_string_lossy().to_string())
329 .unwrap_or_default();
330 let lang = Language::from_extension(&ext);
331
332 if matches!(lang, Language::Unknown) {
333 continue;
334 }
335
336 let metadata = std::fs::metadata(path)?;
338 if metadata.len() > 1_048_576 {
339 continue;
340 }
341
342 if let Ok(content) = std::fs::read_to_string(path) {
343 let hash = format!(
344 "{:x}",
345 sha2::Digest::finalize(sha2::Sha256::new().chain_update(content.as_bytes()))
346 );
347 files.push(SourceFile {
348 path: path.to_path_buf(),
349 language: lang,
350 size_bytes: metadata.len(),
351 content_hash: hash,
352 content,
353 });
354 }
355 }
356
357 Ok(())
358}
359
360pub(super) fn parse_dependencies(
361 root: &Path,
362 filter: &ScanPathFilter,
363) -> dependency_surface::DependencySurface {
364 use crate::ir::dependency_surface::*;
365 let mut surface = DependencySurface::default();
366
367 let req_file = root.join("requirements.txt");
369 if req_file.exists() && filter.allows_path(root, &req_file) {
370 if let Ok(content) = std::fs::read_to_string(&req_file) {
371 for (idx, line) in content.lines().enumerate() {
372 let line = line.trim();
373 if line.is_empty() || line.starts_with('#') || line.starts_with('-') {
374 continue;
375 }
376 let (name, version) = if let Some(pos) = line.find("==") {
377 (
378 line[..pos].trim().to_string(),
379 Some(line[pos + 2..].trim().to_string()),
380 )
381 } else if let Some(pos) = line.find(">=") {
382 (
383 line[..pos].trim().to_string(),
384 Some(line[pos..].trim().to_string()),
385 )
386 } else {
387 (line.to_string(), None)
388 };
389
390 surface.dependencies.push(Dependency {
391 name,
392 version_constraint: version,
393 locked_version: None,
394 locked_hash: None,
395 registry: "pypi".into(),
396 is_dev: false,
397 location: Some(SourceLocation {
398 file: req_file.clone(),
399 line: idx + 1,
400 column: 0,
401 end_line: None,
402 end_column: None,
403 }),
404 });
405 }
406 }
407 }
408
409 for (filename, format) in [
411 ("Pipfile.lock", LockfileFormat::PipenvLock),
412 ("poetry.lock", LockfileFormat::PoetryLock),
413 ("uv.lock", LockfileFormat::UvLock),
414 ] {
415 let lock_path = root.join(filename);
416 if lock_path.exists() && filter.allows_path(root, &lock_path) {
417 surface.lockfile = Some(LockfileInfo {
418 path: lock_path,
419 format,
420 all_pinned: true,
421 all_hashed: false,
422 });
423 break;
424 }
425 }
426
427 let pkg_json = root.join("package.json");
429 if pkg_json.exists() && filter.allows_path(root, &pkg_json) {
430 if let Ok(content) = std::fs::read_to_string(&pkg_json) {
431 if let Ok(value) = serde_json::from_str::<serde_json::Value>(&content) {
432 for (key, is_dev) in [("dependencies", false), ("devDependencies", true)] {
433 if let Some(deps) = value.get(key).and_then(|v| v.as_object()) {
434 for (name, version) in deps {
435 let line = find_json_key_line(&content, name);
436 surface.dependencies.push(Dependency {
437 name: name.clone(),
438 version_constraint: version.as_str().map(|s| s.to_string()),
439 locked_version: None,
440 locked_hash: None,
441 registry: "npm".into(),
442 is_dev,
443 location: Some(SourceLocation {
444 file: pkg_json.clone(),
445 line,
446 column: 0,
447 end_line: None,
448 end_column: None,
449 }),
450 });
451 }
452 }
453 }
454 }
455 }
456
457 let lock = root.join("package-lock.json");
459 if lock.exists() {
460 surface.lockfile = Some(LockfileInfo {
461 path: lock,
462 format: dependency_surface::LockfileFormat::NpmLock,
463 all_pinned: true,
464 all_hashed: false,
465 });
466 }
467 }
468
469 surface
470}
471
472fn find_json_key_line(content: &str, key: &str) -> usize {
475 let needle = format!("\"{}\"", key);
476 for (idx, line) in content.lines().enumerate() {
477 if line.contains(&needle) {
478 return idx + 1;
479 }
480 }
481 1
482}
483
484pub(super) fn parse_provenance(
485 root: &Path,
486 filter: &ScanPathFilter,
487) -> provenance_surface::ProvenanceSurface {
488 let mut prov = provenance_surface::ProvenanceSurface::default();
489
490 let pkg_json = root.join("package.json");
492 if pkg_json.exists() && filter.allows_path(root, &pkg_json) {
493 if let Ok(content) = std::fs::read_to_string(&pkg_json) {
494 if let Ok(value) = serde_json::from_str::<serde_json::Value>(&content) {
495 prov.author = value
496 .get("author")
497 .and_then(|v| v.as_str())
498 .map(|s| s.to_string());
499 prov.repository = value
500 .get("repository")
501 .and_then(|v| v.get("url").or(Some(v)))
502 .and_then(|v| v.as_str())
503 .map(|s| s.to_string());
504 prov.license = value
505 .get("license")
506 .and_then(|v| v.as_str())
507 .map(|s| s.to_string());
508 }
509 }
510 }
511
512 let pyproject = root.join("pyproject.toml");
514 if pyproject.exists() && filter.allows_path(root, &pyproject) {
515 if let Ok(content) = std::fs::read_to_string(&pyproject) {
516 if let Ok(value) = content.parse::<toml::Value>() {
517 if let Some(project) = value.get("project") {
518 prov.license = project
519 .get("license")
520 .and_then(|v| v.get("text").or(Some(v)))
521 .and_then(|v| v.as_str())
522 .map(|s| s.to_string());
523 if let Some(authors) = project.get("authors").and_then(|v| v.as_array()) {
524 if let Some(first) = authors.first() {
525 prov.author = first
526 .get("name")
527 .and_then(|v| v.as_str())
528 .map(|s| s.to_string());
529 }
530 }
531 }
532 if let Some(urls) = value.get("project").and_then(|p| p.get("urls")) {
533 prov.repository = urls
534 .get("Repository")
535 .or(urls.get("repository"))
536 .and_then(|v| v.as_str())
537 .map(|s| s.to_string());
538 }
539 }
540 }
541 }
542
543 prov
544}
545
546use sha2::Digest;
547
548#[cfg(test)]
549mod tests {
550 use super::*;
551
552 #[test]
553 fn test_file_detection_covers_shell_and_suffix_python_tests() {
554 assert!(is_test_file(Path::new("scripts/check.test.sh")));
555 assert!(is_test_file(Path::new("scripts/check.spec.sh")));
556 assert!(is_test_file(Path::new("scripts/import_data_test.py")));
557 assert!(is_test_file(Path::new("tests/unit.py")));
558 assert!(!is_test_file(Path::new("scripts/load.py")));
559 }
560
561 #[test]
562 fn extracts_typescript_mcp_server_tool_declarations() {
563 let content = r#"
564const server = new McpServer({ name: "demo" })
565
566server.tool(
567 'search_party',
568 'Busca fuzzy por nome.',
569 {},
570 async () => ({ content: [] })
571)
572
573server.registerTool("create_report", { description: "Create report" }, async () => {})
574"#;
575
576 let tools = extract_mcp_tools_from_source(Path::new("src/mcp/server.ts"), content);
577 assert_eq!(tools.len(), 2);
578 assert_eq!(tools[0].name, "search_party");
579 assert_eq!(
580 tools[0].description.as_deref(),
581 Some("Busca fuzzy por nome.")
582 );
583 assert_eq!(tools[0].defined_at.as_ref().map(|loc| loc.line), Some(5));
584 assert_eq!(tools[1].name, "create_report");
585 assert_eq!(tools[1].description, None);
586 }
587}