Skip to main content

infigraph_core/taint/
dynamic_urls.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use anyhow::Result;
5use serde::Serialize;
6
7use super::{FuncInfo, SourceCache};
8use crate::graph::GraphQuery;
9use crate::graph::GraphStore;
10use crate::routes::Route;
11
12#[derive(Debug, Clone, Serialize)]
13pub struct DynamicUrl {
14    pub symbol_id: String,
15    pub file: String,
16    pub line: u32,
17    pub url_template: String,
18    pub http_client: String,
19    pub matched_route: Option<MatchedRoute>,
20}
21
22#[derive(Debug, Clone, Serialize)]
23pub struct MatchedRoute {
24    pub handler_id: String,
25    pub method: String,
26    pub path: String,
27    pub framework: String,
28}
29
30static HTTP_CLIENT_PATTERNS: &[(&str, &[&str])] = &[
31    ("fetch", &["fetch(", "fetch ("]),
32    (
33        "axios",
34        &[
35            "axios.get(",
36            "axios.post(",
37            "axios.put(",
38            "axios.delete(",
39            "axios.patch(",
40            "axios(",
41        ],
42    ),
43    (
44        "requests",
45        &[
46            "requests.get(",
47            "requests.post(",
48            "requests.put(",
49            "requests.delete(",
50            "requests.patch(",
51        ],
52    ),
53    (
54        "http_client",
55        &[
56            "HttpClient(",
57            "http.get(",
58            "http.post(",
59            "http.put(",
60            "http.delete(",
61        ],
62    ),
63    ("urllib", &["urllib.request.urlopen(", "urlopen("]),
64    (
65        "okhttp",
66        &["OkHttpClient(", ".newCall(", "Request.Builder()"],
67    ),
68    (
69        "resttemplate",
70        &[
71            "restTemplate.getForObject(",
72            "restTemplate.postForObject(",
73            "restTemplate.exchange(",
74        ],
75    ),
76    (
77        "webclient",
78        &["WebClient.create(", "webClient.get()", "webClient.post()"],
79    ),
80    (
81        "httpclient_dotnet",
82        &[
83            "HttpClient.GetAsync(",
84            "HttpClient.PostAsync(",
85            "HttpClient.SendAsync(",
86        ],
87    ),
88    ("net_http", &["http.Get(", "http.Post(", "http.NewRequest("]),
89    (
90        "reqwest",
91        &["reqwest::get(", "reqwest::Client::new(", ".send().await"],
92    ),
93];
94
95pub fn detect_dynamic_urls(store: &GraphStore, root: &Path) -> Result<Vec<DynamicUrl>> {
96    let _lock = store.write_lock()?;
97    let conn = store.connection()?;
98    let gq = GraphQuery::new(&conn);
99
100    // Get known routes for matching
101    let routes = crate::routes::detect_routes(&gq).unwrap_or_default();
102
103    let result = conn
104        .query("MATCH (s:Symbol) WHERE s.kind IN ['Function', 'Method'] AND s.file IS NOT NULL RETURN s.id, s.file, s.start_line, s.end_line")
105        .map_err(|e| anyhow::anyhow!("query: {e}"))?;
106
107    let mut functions: Vec<(String, String, u32, u32)> = Vec::new();
108    for row in result {
109        if row.len() < 4 {
110            continue;
111        }
112        let id = row[0].to_string();
113        let file = row[1].to_string();
114        let start: u32 = row[2].to_string().parse().unwrap_or(0);
115        let end: u32 = row[3].to_string().parse().unwrap_or(0);
116        if start > 0 && end > start {
117            functions.push((id, file, start, end));
118        }
119    }
120
121    let mut file_cache: HashMap<String, Vec<String>> = HashMap::new();
122    let mut urls = Vec::new();
123
124    for (symbol_id, file, start_line, end_line) in &functions {
125        let lines = file_cache.entry(file.clone()).or_insert_with(|| {
126            std::fs::read_to_string(root.join(file))
127                .unwrap_or_default()
128                .lines()
129                .map(String::from)
130                .collect()
131        });
132
133        let start_idx = (*start_line as usize).saturating_sub(1);
134        let end_idx = (*end_line as usize).min(lines.len());
135        if start_idx >= end_idx {
136            continue;
137        }
138
139        let func_lines = &lines[start_idx..end_idx];
140        let detected = find_urls_in_function(symbol_id, file, *start_line, func_lines, &routes);
141        urls.extend(detected);
142    }
143
144    if !urls.is_empty() {
145        write_calls_service_edges(store, &urls)?;
146    }
147
148    Ok(urls)
149}
150
151pub fn detect_dynamic_urls_with_cache(
152    store: &GraphStore,
153    functions: &[FuncInfo],
154    cache: &SourceCache,
155) -> Result<Vec<DynamicUrl>> {
156    let _lock = store.write_lock()?;
157    let conn = store.connection()?;
158    let gq = GraphQuery::new(&conn);
159    let routes = crate::routes::detect_routes(&gq).unwrap_or_default();
160
161    let mut urls = Vec::new();
162    for func in functions {
163        let lines = match cache.get(&func.file) {
164            Some(l) => l,
165            None => continue,
166        };
167        let start_idx = (func.start_line as usize).saturating_sub(1);
168        let end_idx = (func.end_line as usize).min(lines.len());
169        if start_idx >= end_idx {
170            continue;
171        }
172
173        let func_lines = &lines[start_idx..end_idx];
174        let detected =
175            find_urls_in_function(&func.id, &func.file, func.start_line, func_lines, &routes);
176        urls.extend(detected);
177    }
178
179    if !urls.is_empty() {
180        write_calls_service_edges(store, &urls)?;
181    }
182
183    Ok(urls)
184}
185
186fn find_urls_in_function(
187    symbol_id: &str,
188    file: &str,
189    base_line: u32,
190    lines: &[String],
191    routes: &[Route],
192) -> Vec<DynamicUrl> {
193    let mut urls = Vec::new();
194    let mut string_vars: HashMap<String, String> = HashMap::new();
195
196    for (offset, line) in lines.iter().enumerate() {
197        let trimmed = line.trim();
198        let lower = trimmed.to_lowercase();
199        let line_no = base_line + offset as u32;
200
201        // Track string variable assignments for constant propagation
202        if let Some((var, val)) = extract_string_assignment(trimmed) {
203            string_vars.insert(var, val);
204        }
205
206        // Check for HTTP client calls
207        for &(client, patterns) in HTTP_CLIENT_PATTERNS {
208            for &pat in patterns {
209                if lower.contains(&pat.to_lowercase()) {
210                    if let Some(url) = extract_url_from_line(trimmed, &string_vars) {
211                        let template = url_to_template(&url);
212                        let matched = match_route(&template, routes);
213
214                        urls.push(DynamicUrl {
215                            symbol_id: symbol_id.to_string(),
216                            file: file.to_string(),
217                            line: line_no,
218                            url_template: template,
219                            http_client: client.to_string(),
220                            matched_route: matched,
221                        });
222                    }
223                    break;
224                }
225            }
226        }
227    }
228
229    urls
230}
231
232fn extract_string_assignment(line: &str) -> Option<(String, String)> {
233    let line = line.trim();
234    let stripped = line
235        .strip_prefix("let ")
236        .or_else(|| line.strip_prefix("var "))
237        .or_else(|| line.strip_prefix("const "))
238        .or_else(|| line.strip_prefix("String "))
239        .or_else(|| line.strip_prefix("final "))
240        .unwrap_or(line);
241
242    if let Some(eq_pos) = stripped.find('=') {
243        if eq_pos > 0 && stripped.get(eq_pos + 1..eq_pos + 2) != Some("=") {
244            let var = stripped[..eq_pos].split_whitespace().last()?;
245            let rhs = stripped[eq_pos + 1..].trim();
246            // Only track string literals
247            if (rhs.starts_with('"') && rhs.ends_with('"'))
248                || (rhs.starts_with('\'') && rhs.ends_with('\''))
249                || (rhs.starts_with('`') && rhs.ends_with('`'))
250                || rhs.starts_with("f\"")
251                || rhs.starts_with("f'")
252            {
253                let val = rhs.trim_matches(|c: char| c == '"' || c == '\'' || c == '`');
254                let val = val.strip_prefix("f").unwrap_or(val);
255                return Some((var.to_string(), val.to_string()));
256            }
257        }
258    }
259    None
260}
261
262fn extract_url_from_line(line: &str, vars: &HashMap<String, String>) -> Option<String> {
263    // Look for string literals containing URL-like patterns
264    let url_indicators = ["http://", "https://", "/api/", "/v1/", "/v2/", "/graphql"];
265
266    // Direct string literal in call
267    for delim in ['"', '\'', '`'] {
268        let mut search_from = 0;
269        while let Some(start) = line[search_from..].find(delim) {
270            let abs_start = search_from + start + 1;
271            if abs_start >= line.len() {
272                break;
273            }
274            if let Some(end) = line[abs_start..].find(delim) {
275                let candidate = &line[abs_start..abs_start + end];
276                if url_indicators.iter().any(|ind| candidate.contains(ind))
277                    || candidate.starts_with('/')
278                {
279                    return Some(candidate.to_string());
280                }
281            }
282            search_from = abs_start;
283        }
284    }
285
286    // Template literals with interpolation: `${base}/api/users/${id}`
287    if let Some(start) = line.find('`') {
288        if let Some(end) = line[start + 1..].find('`') {
289            let template = &line[start + 1..start + 1 + end];
290            if url_indicators.iter().any(|ind| template.contains(ind)) || template.starts_with('/')
291            {
292                return Some(template.to_string());
293            }
294        }
295    }
296
297    // f-string: f"/api/users/{user_id}"
298    if let Some(fstart) = line.find("f\"").or_else(|| line.find("f'")) {
299        let delim = line.as_bytes()[fstart + 1] as char;
300        let inner_start = fstart + 2;
301        if let Some(end) = line[inner_start..].find(delim) {
302            let template = &line[inner_start..inner_start + end];
303            if url_indicators.iter().any(|ind| template.contains(ind)) || template.starts_with('/')
304            {
305                return Some(template.to_string());
306            }
307        }
308    }
309
310    // String concatenation with known variables
311    if line.contains('+') || line.contains("format!(") || line.contains("String.format(") {
312        for (var, val) in vars {
313            if line.contains(var.as_str()) && (val.contains('/') || val.contains("http")) {
314                return Some(val.clone());
315            }
316        }
317    }
318
319    None
320}
321
322fn url_to_template(url: &str) -> String {
323    let mut template = String::new();
324    let mut in_var = false;
325
326    for ch in url.chars() {
327        if ch == '{' || ch == '$' {
328            if !in_var {
329                template.push('{');
330                in_var = true;
331            }
332        } else if in_var && (ch == '}' || ch == '/' || ch == '?' || ch == '&') {
333            template.push('}');
334            in_var = false;
335            if ch != '}' {
336                template.push(ch);
337            }
338        } else if in_var {
339            // Skip variable name details
340        } else {
341            template.push(ch);
342        }
343    }
344    if in_var {
345        template.push('}');
346    }
347
348    // Normalize: collapse consecutive {}'s
349    template.replace("{}", "{id}")
350}
351
352fn match_route(template: &str, routes: &[Route]) -> Option<MatchedRoute> {
353    let template_path = template.split('?').next().unwrap_or(template);
354    let template_path = template_path.split("://").last().unwrap_or(template_path);
355    // Strip host if present
356    let template_path = if template_path.contains('/') && !template_path.starts_with('/') {
357        template_path
358            .split_once('/')
359            .map(|(_, p)| format!("/{}", p))
360            .unwrap_or_else(|| template_path.to_string())
361    } else {
362        template_path.to_string()
363    };
364
365    let template_segments: Vec<&str> = template_path.split('/').filter(|s| !s.is_empty()).collect();
366
367    for route in routes {
368        let route_segments: Vec<&str> = route.path.split('/').filter(|s| !s.is_empty()).collect();
369
370        if template_segments.len() != route_segments.len() {
371            continue;
372        }
373
374        let mut matched = true;
375        for (ts, rs) in template_segments.iter().zip(route_segments.iter()) {
376            let ts_is_param = ts.starts_with('{') || ts.starts_with(':') || ts.starts_with('<');
377            let rs_is_param = rs.starts_with('{') || rs.starts_with(':') || rs.starts_with('<');
378            if ts_is_param || rs_is_param {
379                continue; // Parameter segments always match
380            }
381            if ts.to_lowercase() != rs.to_lowercase() {
382                matched = false;
383                break;
384            }
385        }
386
387        if matched {
388            return Some(MatchedRoute {
389                handler_id: route.handler_id.clone(),
390                method: route.method.clone(),
391                path: route.path.clone(),
392                framework: route.framework.clone(),
393            });
394        }
395    }
396
397    None
398}
399
400fn write_calls_service_edges(store: &GraphStore, urls: &[DynamicUrl]) -> Result<()> {
401    let conn = store.connection()?;
402
403    conn.query("BEGIN TRANSACTION")
404        .map_err(|e| anyhow::anyhow!("begin txn: {e}"))?;
405
406    for url in urls {
407        if let Some(ref matched) = url.matched_route {
408            let src_esc = crate::escape_str(&url.symbol_id);
409            let tgt_esc = crate::escape_str(&matched.handler_id);
410            let method_esc = crate::escape_str(&matched.method);
411            let path_esc = crate::escape_str(&url.url_template);
412
413            let _ = conn.query(&format!(
414                "MATCH (s:Symbol), (t:Symbol) WHERE s.id = '{src_esc}' AND t.id = '{tgt_esc}' \
415                 CREATE (s)-[:CALLS_SERVICE {{method: '{method_esc}', path: '{path_esc}', target_service: ''}}]->(t)"
416            ));
417        }
418    }
419
420    conn.query("COMMIT")
421        .map_err(|e| anyhow::anyhow!("commit txn: {e}"))?;
422
423    Ok(())
424}
425
426pub fn format_dynamic_urls(urls: &[DynamicUrl]) -> String {
427    if urls.is_empty() {
428        return "No dynamic URL constructions detected.".to_string();
429    }
430
431    let matched_count = urls.iter().filter(|u| u.matched_route.is_some()).count();
432
433    let mut out = format!(
434        "Dynamic URLs: {} total ({} matched to routes, {} unmatched)\n\n",
435        urls.len(),
436        matched_count,
437        urls.len() - matched_count
438    );
439
440    let mut by_client: std::collections::BTreeMap<&str, Vec<&DynamicUrl>> =
441        std::collections::BTreeMap::new();
442    for u in urls {
443        by_client.entry(&u.http_client).or_default().push(u);
444    }
445
446    for (client, items) in &by_client {
447        out.push_str(&format!("## {} ({} calls)\n", client, items.len()));
448        for u in items {
449            out.push_str(&format!("  {}:{} — {}\n", u.file, u.line, u.url_template));
450            if let Some(ref m) = u.matched_route {
451                out.push_str(&format!(
452                    "    -> {} {} ({}) [{}]\n",
453                    m.method, m.path, m.handler_id, m.framework
454                ));
455            }
456        }
457        out.push('\n');
458    }
459
460    out
461}
462
463#[cfg(test)]
464mod tests {
465    use super::*;
466
467    #[test]
468    fn test_extract_url_from_string_literal() {
469        let vars = HashMap::new();
470        let line = r#"response = requests.get("https://api.example.com/api/v1/users")"#;
471        let url = extract_url_from_line(line, &vars);
472        assert!(url.is_some(), "should extract URL");
473        assert!(url.unwrap().contains("/api/v1/users"));
474    }
475
476    #[test]
477    fn test_extract_url_template_literal() {
478        let vars = HashMap::new();
479        let line = "const res = fetch(`/api/users/${userId}`)";
480        let url = extract_url_from_line(line, &vars);
481        assert!(url.is_some(), "should extract template URL");
482    }
483
484    #[test]
485    fn test_extract_url_fstring() {
486        let vars = HashMap::new();
487        let line = r#"response = requests.get(f"/api/users/{user_id}")"#;
488        let url = extract_url_from_line(line, &vars);
489        assert!(url.is_some(), "should extract f-string URL");
490    }
491
492    #[test]
493    fn test_url_to_template() {
494        assert_eq!(url_to_template("/api/users/${userId}"), "/api/users/{id}");
495        assert_eq!(url_to_template("/api/v1/items"), "/api/v1/items");
496    }
497
498    #[test]
499    fn test_match_route_exact() {
500        let routes = vec![Route {
501            method: "GET".to_string(),
502            path: "/api/users".to_string(),
503            handler_id: "app.py::get_users".to_string(),
504            file: "app.py".to_string(),
505            framework: "flask".to_string(),
506        }];
507        let matched = match_route("/api/users", &routes);
508        assert!(matched.is_some());
509        assert_eq!(matched.unwrap().handler_id, "app.py::get_users");
510    }
511
512    #[test]
513    fn test_match_route_with_param() {
514        let routes = vec![Route {
515            method: "GET".to_string(),
516            path: "/api/users/:id".to_string(),
517            handler_id: "app.py::get_user".to_string(),
518            file: "app.py".to_string(),
519            framework: "express".to_string(),
520        }];
521        let matched = match_route("/api/users/{id}", &routes);
522        assert!(matched.is_some());
523    }
524
525    #[test]
526    fn test_match_route_no_match() {
527        let routes = vec![Route {
528            method: "GET".to_string(),
529            path: "/api/users".to_string(),
530            handler_id: "app.py::get_users".to_string(),
531            file: "app.py".to_string(),
532            framework: "flask".to_string(),
533        }];
534        let matched = match_route("/api/products", &routes);
535        assert!(matched.is_none());
536    }
537
538    #[test]
539    fn test_extract_string_assignment() {
540        let (var, val) =
541            extract_string_assignment(r#"const base_url = "https://api.example.com""#).unwrap();
542        assert_eq!(var, "base_url");
543        assert_eq!(val, "https://api.example.com");
544    }
545
546    #[test]
547    fn test_no_url_in_plain_code() {
548        let vars = HashMap::new();
549        let line = "x = compute(a, b, c)";
550        assert!(extract_url_from_line(line, &vars).is_none());
551    }
552}