Skip to main content

codemem_engine/index/
api_surface.rs

1//! API surface detection: endpoint definitions and HTTP client calls.
2//!
3//! Post-processing pass on extracted symbols to detect REST/HTTP endpoints
4//! and client calls for cross-service linking.
5
6use crate::index::symbol::{Reference, ReferenceKind, Symbol, SymbolKind};
7use std::sync::LazyLock;
8
9// ── Precompiled regexes ─────────────────────────────────────────────────────
10
11static RE_QUOTED_STRING: LazyLock<regex::Regex> =
12    LazyLock::new(|| regex::Regex::new(r#"["']([^"']+)["']"#).unwrap());
13
14static RE_METHODS_PARAM: LazyLock<regex::Regex> =
15    LazyLock::new(|| regex::Regex::new(r#"methods\s*=\s*\[([^\]]+)\]"#).unwrap());
16
17static RE_NESTJS_METHOD: LazyLock<regex::Regex> =
18    LazyLock::new(|| regex::Regex::new(r"^@(Get|Post|Put|Delete|Patch|Head|Options)\b").unwrap());
19
20static RE_FLASK_PARAM: LazyLock<regex::Regex> =
21    LazyLock::new(|| regex::Regex::new(r"<(?:\w+:)?(\w+)>").unwrap());
22
23static RE_EXPRESS_PARAM: LazyLock<regex::Regex> =
24    LazyLock::new(|| regex::Regex::new(r":(\w+)").unwrap());
25
26/// A detected API endpoint.
27#[derive(Debug, Clone, PartialEq)]
28pub struct DetectedEndpoint {
29    /// Endpoint ID: "ep:{namespace}:{method}:{path}"
30    pub id: String,
31    /// HTTP method (GET, POST, PUT, DELETE, PATCH, etc.) or None for catch-all.
32    pub method: Option<String>,
33    /// URL path pattern, normalized (e.g., "/api/users/{id}").
34    pub path: String,
35    /// Handler symbol qualified name.
36    pub handler: String,
37    /// File path of the handler.
38    pub file_path: String,
39    /// Line number.
40    pub line: usize,
41}
42
43/// A detected HTTP client call.
44#[derive(Debug, Clone, PartialEq)]
45pub struct DetectedClientCall {
46    /// Symbol making the HTTP call.
47    pub caller: String,
48    /// HTTP method if detectable.
49    pub method: Option<String>,
50    /// URL pattern extracted from the call (may be partial/relative).
51    pub url_pattern: Option<String>,
52    /// The HTTP client library being used.
53    pub client_library: String,
54    /// File path.
55    pub file_path: String,
56    /// Line number.
57    pub line: usize,
58}
59
60/// Result of API surface detection.
61#[derive(Debug, Default)]
62pub struct ApiSurfaceResult {
63    pub endpoints: Vec<DetectedEndpoint>,
64    pub client_calls: Vec<DetectedClientCall>,
65}
66
67/// Detect API endpoints from extracted symbols.
68///
69/// Scans symbol attributes/decorators for framework-specific route patterns:
70/// - Python: `@app.route`, `@router.get`, `@api_view`, `@GetMapping` (for Django views, Flask, FastAPI)
71/// - TypeScript: `@Get`, `@Post` (NestJS), `app.get` (Express) — detected from call references
72/// - Java: `@GetMapping`, `@PostMapping`, `@RequestMapping`
73/// - Go: detected via call patterns (`http.HandleFunc`, `router.GET`, etc.)
74pub fn detect_endpoints(symbols: &[Symbol], namespace: &str) -> Vec<DetectedEndpoint> {
75    let mut endpoints = Vec::new();
76
77    for sym in symbols {
78        // Check attributes/decorators for route patterns
79        for attr in &sym.attributes {
80            if let Some(ep) = parse_route_decorator(attr, sym, namespace) {
81                endpoints.push(ep);
82            }
83        }
84
85        // Check for Django URL pattern-style views (class-based views)
86        if is_django_view_class(sym) {
87            // Django CBVs: methods like get(), post() on View subclasses
88            // The URL pattern linking happens elsewhere; here we just mark the handler
89            for method in &["get", "post", "put", "patch", "delete"] {
90                if sym.kind == SymbolKind::Method && sym.name == *method {
91                    if let Some(parent) = &sym.parent {
92                        endpoints.push(DetectedEndpoint {
93                            id: format!("ep:{namespace}:{}:view:{parent}", method.to_uppercase()),
94                            method: Some(method.to_uppercase()),
95                            path: format!("view:{parent}"), // placeholder until URL conf resolved
96                            handler: sym.qualified_name.clone(),
97                            file_path: sym.file_path.clone(),
98                            line: sym.line_start,
99                        });
100                    }
101                }
102            }
103        }
104    }
105
106    endpoints
107}
108
109/// Parse a route decorator/annotation string into an endpoint.
110///
111/// Handles patterns like:
112/// - `@app.route("/users")` or `@app.route("/users", methods=["GET", "POST"])`
113/// - `@router.get("/users/{id}")` or `@app.get("/users/<int:id>")`
114/// - `@GetMapping("/users")` or `@RequestMapping(value="/users", method=RequestMethod.GET)`
115/// - `@Get("/users")` (NestJS)
116/// - `@api_view(["GET"])` (DRF — path comes from urls.py, not decorator)
117fn parse_route_decorator(attr: &str, sym: &Symbol, namespace: &str) -> Option<DetectedEndpoint> {
118    let attr_lower = attr.to_lowercase();
119
120    // Flask/FastAPI style: @app.route("/path") or @router.get("/path")
121    if attr_lower.contains("route(")
122        || attr_lower.contains(".get(")
123        || attr_lower.contains(".post(")
124        || attr_lower.contains(".put(")
125        || attr_lower.contains(".delete(")
126        || attr_lower.contains(".patch(")
127    {
128        let method = extract_http_method_from_decorator(attr);
129        let path = extract_path_from_decorator(attr)?;
130        let normalized_path = normalize_path_pattern(&path);
131
132        return Some(DetectedEndpoint {
133            id: format!(
134                "ep:{namespace}:{}:{normalized_path}",
135                method.as_deref().unwrap_or("ANY")
136            ),
137            method,
138            path: normalized_path,
139            handler: sym.qualified_name.clone(),
140            file_path: sym.file_path.clone(),
141            line: sym.line_start,
142        });
143    }
144
145    // Spring style: @GetMapping("/path"), @PostMapping("/path"), @RequestMapping(...)
146    if attr_lower.contains("mapping(") || attr_lower.contains("mapping\"") {
147        let method = extract_spring_method(attr);
148        let path = extract_path_from_decorator(attr)?;
149        let normalized_path = normalize_path_pattern(&path);
150
151        return Some(DetectedEndpoint {
152            id: format!(
153                "ep:{namespace}:{}:{normalized_path}",
154                method.as_deref().unwrap_or("ANY")
155            ),
156            method,
157            path: normalized_path,
158            handler: sym.qualified_name.clone(),
159            file_path: sym.file_path.clone(),
160            line: sym.line_start,
161        });
162    }
163
164    // NestJS style: @Get("/path"), @Post("/path")
165    if let Some(method) = extract_nestjs_method(attr) {
166        let path = extract_path_from_decorator(attr).unwrap_or_else(|| "/".to_string());
167        let normalized_path = normalize_path_pattern(&path);
168
169        return Some(DetectedEndpoint {
170            id: format!("ep:{namespace}:{method}:{normalized_path}"),
171            method: Some(method),
172            path: normalized_path,
173            handler: sym.qualified_name.clone(),
174            file_path: sym.file_path.clone(),
175            line: sym.line_start,
176        });
177    }
178
179    None
180}
181
182/// Detect HTTP client calls from extracted references.
183///
184/// Scans call references for known HTTP client patterns:
185/// - Python: `requests.get`/`post`/..., `httpx.get`/`post`/..., `aiohttp`
186/// - TS/JS: `fetch`, `axios.get`/`post`/..., `got`
187/// - Java: `RestTemplate`, `WebClient`, `HttpClient`
188/// - Go: `http.Get`, `http.Post`, `http.NewRequest`
189pub fn detect_client_calls(references: &[Reference]) -> Vec<DetectedClientCall> {
190    let mut calls = Vec::new();
191
192    for r in references {
193        if r.kind != ReferenceKind::Call {
194            continue;
195        }
196
197        if let Some(call) = parse_client_call(&r.target_name, r) {
198            calls.push(call);
199        }
200    }
201
202    calls
203}
204
205fn parse_client_call(target: &str, reference: &Reference) -> Option<DetectedClientCall> {
206    let target_lower = target.to_lowercase();
207
208    // Python: requests.get, requests.post, httpx.get, etc.
209    if target_lower.starts_with("requests.") || target_lower.starts_with("httpx.") {
210        let parts: Vec<&str> = target.splitn(2, '.').collect();
211        let library = parts[0].to_string();
212        let method = parts.get(1).and_then(|m| http_method_from_name(m));
213
214        return Some(DetectedClientCall {
215            caller: reference.source_qualified_name.clone(),
216            method,
217            url_pattern: None, // would need string literal analysis
218            client_library: library,
219            file_path: reference.file_path.clone(),
220            line: reference.line,
221        });
222    }
223
224    // TS/JS: fetch (global function)
225    if target_lower == "fetch" {
226        return Some(DetectedClientCall {
227            caller: reference.source_qualified_name.clone(),
228            method: None, // determined by options argument
229            url_pattern: None,
230            client_library: "fetch".to_string(),
231            file_path: reference.file_path.clone(),
232            line: reference.line,
233        });
234    }
235
236    // TS/JS: axios.get, axios.post, etc.
237    if target_lower.starts_with("axios.") {
238        let method = target.split('.').nth(1).and_then(http_method_from_name);
239        return Some(DetectedClientCall {
240            caller: reference.source_qualified_name.clone(),
241            method,
242            url_pattern: None,
243            client_library: "axios".to_string(),
244            file_path: reference.file_path.clone(),
245            line: reference.line,
246        });
247    }
248
249    // Go: http.Get, http.Post, http.NewRequest
250    if target_lower.starts_with("http.")
251        && (target.contains("Get")
252            || target.contains("Post")
253            || target.contains("NewRequest")
254            || target.contains("Do"))
255    {
256        let method = if target.contains("Get") {
257            Some("GET".to_string())
258        } else if target.contains("Post") {
259            Some("POST".to_string())
260        } else {
261            None
262        };
263        return Some(DetectedClientCall {
264            caller: reference.source_qualified_name.clone(),
265            method,
266            url_pattern: None,
267            client_library: "net/http".to_string(),
268            file_path: reference.file_path.clone(),
269            line: reference.line,
270        });
271    }
272
273    // Java: RestTemplate, WebClient
274    if target_lower.contains("resttemplate")
275        || target_lower.contains("webclient")
276        || target_lower.contains("httpclient")
277    {
278        return Some(DetectedClientCall {
279            caller: reference.source_qualified_name.clone(),
280            method: None,
281            url_pattern: None,
282            client_library: target.split('.').next().unwrap_or(target).to_string(),
283            file_path: reference.file_path.clone(),
284            line: reference.line,
285        });
286    }
287
288    None
289}
290
291// ── Helper functions ──
292
293/// Extract the first quoted string from a decorator (the path argument).
294fn extract_path_from_decorator(attr: &str) -> Option<String> {
295    RE_QUOTED_STRING.captures(attr).map(|c| c[1].to_string())
296}
297
298/// Extract HTTP method from a decorator like `@app.get(...)` or `@router.post(...)`
299fn extract_http_method_from_decorator(attr: &str) -> Option<String> {
300    let attr_lower = attr.to_lowercase();
301    for method in &["get", "post", "put", "delete", "patch", "head", "options"] {
302        // Match .get( or .post( etc
303        if attr_lower.contains(&format!(".{method}(")) {
304            return Some(method.to_uppercase());
305        }
306    }
307    // @app.route with methods= parameter
308    if attr_lower.contains("route(") {
309        if let Some(methods) = extract_methods_param(attr) {
310            return methods.first().cloned();
311        }
312    }
313    None
314}
315
316/// Extract `methods=["GET", "POST"]` from a route decorator.
317fn extract_methods_param(attr: &str) -> Option<Vec<String>> {
318    let caps = RE_METHODS_PARAM.captures(attr)?;
319    let methods_str = &caps[1];
320    let methods: Vec<String> = methods_str
321        .split(',')
322        .map(|m| {
323            m.trim()
324                .trim_matches(|c| c == '"' || c == '\'')
325                .to_uppercase()
326        })
327        .filter(|m| !m.is_empty())
328        .collect();
329    if methods.is_empty() {
330        None
331    } else {
332        Some(methods)
333    }
334}
335
336/// Extract HTTP method from Spring annotations.
337fn extract_spring_method(attr: &str) -> Option<String> {
338    let attr_lower = attr.to_lowercase();
339    if attr_lower.contains("getmapping") {
340        return Some("GET".to_string());
341    }
342    if attr_lower.contains("postmapping") {
343        return Some("POST".to_string());
344    }
345    if attr_lower.contains("putmapping") {
346        return Some("PUT".to_string());
347    }
348    if attr_lower.contains("deletemapping") {
349        return Some("DELETE".to_string());
350    }
351    if attr_lower.contains("patchmapping") {
352        return Some("PATCH".to_string());
353    }
354    // @RequestMapping with method= parameter
355    if attr_lower.contains("requestmapping") {
356        if attr_lower.contains("get") {
357            return Some("GET".to_string());
358        }
359        if attr_lower.contains("post") {
360            return Some("POST".to_string());
361        }
362    }
363    None
364}
365
366/// Extract HTTP method from NestJS decorators.
367fn extract_nestjs_method(attr: &str) -> Option<String> {
368    // NestJS: @Get, @Post, @Put, @Delete, @Patch
369    // These are standalone decorators (not method calls on an object)
370    RE_NESTJS_METHOD.captures(attr).map(|c| c[1].to_uppercase())
371}
372
373/// Normalize a URL path pattern:
374/// - Flask: `/users/<int:id>` -> `/users/{id}`
375/// - Express: `/users/:id` -> `/users/{id}`
376/// - Spring: `/users/{id}` -> already normalized
377/// - Go: `/users/{id}` -> already normalized
378pub fn normalize_path_pattern(path: &str) -> String {
379    let mut result = path.to_string();
380
381    // Flask: <type:name> or <name> → {name}
382    result = RE_FLASK_PARAM.replace_all(&result, "{$1}").to_string();
383
384    // Express: :name → {name}
385    let express_re = &*RE_EXPRESS_PARAM;
386    result = express_re.replace_all(&result, "{$1}").to_string();
387
388    // Ensure leading slash
389    if !result.starts_with('/') {
390        result = format!("/{result}");
391    }
392
393    // Remove trailing slash (unless it's just "/")
394    if result.len() > 1 && result.ends_with('/') {
395        result.pop();
396    }
397
398    result
399}
400
401/// Check if a symbol looks like a Django class-based view.
402fn is_django_view_class(sym: &Symbol) -> bool {
403    if sym.kind != SymbolKind::Method {
404        return false;
405    }
406    // Check if parent class has View-like attributes
407    sym.parent
408        .as_ref()
409        .is_some_and(|p| p.ends_with("View") || p.ends_with("ViewSet") || p.ends_with("APIView"))
410}
411
412/// Map a method name to HTTP method string.
413fn http_method_from_name(name: &str) -> Option<String> {
414    match name.to_lowercase().as_str() {
415        "get" => Some("GET".to_string()),
416        "post" => Some("POST".to_string()),
417        "put" => Some("PUT".to_string()),
418        "delete" => Some("DELETE".to_string()),
419        "patch" => Some("PATCH".to_string()),
420        "head" => Some("HEAD".to_string()),
421        "options" => Some("OPTIONS".to_string()),
422        _ => None,
423    }
424}
425
426/// Match a client call URL against registered endpoints.
427///
428/// Returns the best matching endpoint with confidence.
429pub fn match_endpoint<'a>(
430    url_path: &str,
431    method: Option<&str>,
432    endpoints: &'a [DetectedEndpoint],
433) -> Option<(&'a DetectedEndpoint, f64)> {
434    let normalized = normalize_path_pattern(url_path);
435    let mut best: Option<(&DetectedEndpoint, f64)> = None;
436
437    for ep in endpoints {
438        // Base confidence from path matching
439        let base_confidence: f64 = if ep.path == normalized {
440            1.0
441        } else if paths_match_with_params(&normalized, &ep.path) {
442            0.9
443        } else if normalized.starts_with(&ep.path) || ep.path.starts_with(&normalized) {
444            0.7
445        } else {
446            continue;
447        };
448
449        let mut confidence = base_confidence;
450
451        // Method match bonus
452        if let (Some(call_method), Some(ep_method)) = (method, ep.method.as_deref()) {
453            if call_method.eq_ignore_ascii_case(ep_method) {
454                confidence += 0.05;
455            } else {
456                confidence -= 0.1;
457            }
458        }
459
460        confidence = confidence.clamp(0.0, 1.0);
461
462        if best.is_none() || confidence > best.unwrap().1 {
463            best = Some((ep, confidence));
464        }
465    }
466
467    // Only return matches above threshold
468    best.filter(|(_, c)| *c >= 0.5)
469}
470
471/// Check if two paths match allowing parameter substitution.
472/// e.g., "/users/123" matches "/users/{id}"
473fn paths_match_with_params(actual: &str, pattern: &str) -> bool {
474    let actual_parts: Vec<&str> = actual.split('/').collect();
475    let pattern_parts: Vec<&str> = pattern.split('/').collect();
476
477    if actual_parts.len() != pattern_parts.len() {
478        return false;
479    }
480
481    actual_parts
482        .iter()
483        .zip(pattern_parts.iter())
484        .all(|(a, p)| a == p || (p.starts_with('{') && p.ends_with('}')))
485}
486
487#[cfg(test)]
488#[path = "tests/api_surface_tests.rs"]
489mod tests;