Skip to main content

pecto_python/
flow.rs

1use crate::context::AnalysisContext;
2use crate::extractors::common::*;
3use pecto_core::model::*;
4
5const MAX_DEPTH: usize = 4;
6
7/// Extract request flows for Python endpoints.
8pub fn extract_flows(spec: &mut ProjectSpec, ctx: &AnalysisContext) {
9    let mut flows = Vec::new();
10
11    for cap in &spec.capabilities {
12        for endpoint in &cap.endpoints {
13            let trigger = format!("{:?} {}", endpoint.method, endpoint.path);
14            let entry_point = format!("{}#{}", cap.source, cap.name);
15
16            let Some(file) = ctx.files.iter().find(|f| f.path == cap.source) else {
17                continue;
18            };
19
20            let root = file.tree.root_node();
21            let source = file.source.as_bytes();
22            let mut steps = Vec::new();
23
24            // Security (Depends)
25            if let Some(sec) = &endpoint.security
26                && sec.authentication.is_some()
27            {
28                steps.push(FlowStep {
29                    actor: cap.name.clone(),
30                    method: "auth".to_string(),
31                    kind: FlowStepKind::SecurityGuard,
32                    description: "Depends(auth)".to_string(),
33                    condition: None,
34                    children: Vec::new(),
35                });
36            }
37
38            // Trace via AST
39            let method_steps =
40                if let Some(func_body) = find_endpoint_function_body(&root, source, endpoint) {
41                    trace_function_body(&func_body, source, 0)
42                } else {
43                    // Fallback: text-based
44                    let method_source = find_endpoint_function_text(&root, source, endpoint)
45                        .unwrap_or_else(|| file.source.clone());
46                    let mut fallback = Vec::new();
47                    trace_source_text(&method_source, &mut fallback);
48                    fallback
49                };
50            steps.extend(method_steps);
51
52            if let Some(b) = endpoint.behaviors.first() {
53                steps.push(FlowStep {
54                    actor: cap.name.clone(),
55                    method: "return".to_string(),
56                    kind: FlowStepKind::Return,
57                    description: format!("Return: {}", b.returns.status),
58                    condition: None,
59                    children: Vec::new(),
60                });
61            }
62
63            if steps.len() > 1 {
64                flows.push(RequestFlow {
65                    trigger,
66                    entry_point,
67                    steps,
68                });
69            }
70        }
71    }
72
73    spec.flows = flows;
74}
75
76// ==================== AST-based Tracing ====================
77
78/// Find the function body AST node for a Python endpoint.
79fn find_endpoint_function_body<'a>(
80    root: &'a tree_sitter::Node<'a>,
81    source: &[u8],
82    endpoint: &Endpoint,
83) -> Option<tree_sitter::Node<'a>> {
84    let method_lower = format!("{:?}", endpoint.method).to_lowercase();
85
86    for i in 0..root.named_child_count() {
87        let node = root.named_child(i).unwrap();
88        if node.kind() == "decorated_definition" {
89            let decorators = collect_decorators(&node, source);
90            for dec in &decorators {
91                let dec_method = dec.name.to_lowercase();
92                if dec_method == method_lower
93                    || dec
94                        .full_name
95                        .to_lowercase()
96                        .ends_with(&format!(".{}", method_lower))
97                {
98                    if let Some(path_arg) = dec.args.first() {
99                        let clean_path = clean_string_literal(path_arg);
100                        if !endpoint.path.ends_with(&clean_path) && !clean_path.is_empty() {
101                            continue;
102                        }
103                    }
104                    if let Some(func) = node.child_by_field_name("definition") {
105                        return func.child_by_field_name("body");
106                    }
107                }
108            }
109        }
110    }
111    None
112}
113
114fn trace_function_body(body: &tree_sitter::Node, source: &[u8], depth: usize) -> Vec<FlowStep> {
115    let mut steps = Vec::new();
116    if depth >= MAX_DEPTH {
117        return steps;
118    }
119    // Python functions are top-level, no class body context needed for most cases
120    // For class-based views, we'd need class body — but FastAPI/Flask use functions
121    trace_node_recursive(body, source, depth, &mut steps);
122    steps
123}
124
125fn trace_node_recursive(
126    node: &tree_sitter::Node,
127    source: &[u8],
128    depth: usize,
129    steps: &mut Vec<FlowStep>,
130) {
131    if depth >= MAX_DEPTH {
132        return;
133    }
134    match node.kind() {
135        "call" => {
136            // Extract function name from AST (avoids multiline arg pollution)
137            if let Some(func_node) = node.child_by_field_name("function") {
138                let func_text = node_text(&func_node, source);
139                if let Some(step) = classify_method_call(&func_text) {
140                    steps.push(step);
141                    return;
142                }
143                // Bare function call (no dot): e.g., send_email(...)
144                if !func_text.contains('.')
145                    && !func_text.is_empty()
146                    && func_text.chars().next().is_some_and(|c| c.is_lowercase())
147                    && !is_excluded_py_target(&func_text)
148                {
149                    steps.push(FlowStep {
150                        actor: "".to_string(),
151                        method: func_text.clone(),
152                        kind: FlowStepKind::ServiceCall,
153                        description: format!("Call: {}()", func_text),
154                        condition: None,
155                        children: Vec::new(),
156                    });
157                    return;
158                }
159            }
160        }
161        "raise_statement" => {
162            let text = node_text(node, source);
163            let exception = text
164                .strip_prefix("raise ")
165                .and_then(|s| s.split('(').next())
166                .unwrap_or("Exception")
167                .trim();
168            steps.push(FlowStep {
169                actor: "".to_string(),
170                method: "raise".to_string(),
171                kind: FlowStepKind::ThrowException,
172                description: format!("raise {}", exception),
173                condition: None,
174                children: Vec::new(),
175            });
176            return;
177        }
178        "if_statement" => {
179            let condition_text = node
180                .child_by_field_name("condition")
181                .map(|c| node_text(&c, source))
182                .unwrap_or_default();
183
184            let mut if_children = Vec::new();
185            if let Some(consequence) = node.child_by_field_name("consequence") {
186                trace_node_recursive(&consequence, source, depth + 1, &mut if_children);
187            }
188
189            let mut else_children = Vec::new();
190            if let Some(alternative) = node.child_by_field_name("alternative") {
191                trace_node_recursive(&alternative, source, depth + 1, &mut else_children);
192            }
193
194            if !if_children.is_empty() || !else_children.is_empty() {
195                steps.push(FlowStep {
196                    actor: "".to_string(),
197                    method: "if".to_string(),
198                    kind: FlowStepKind::Condition,
199                    description: format!("if {}", condition_text),
200                    condition: Some(condition_text),
201                    children: if_children,
202                });
203                if !else_children.is_empty() {
204                    steps.push(FlowStep {
205                        actor: "".to_string(),
206                        method: "else".to_string(),
207                        kind: FlowStepKind::Condition,
208                        description: "else".to_string(),
209                        condition: Some("else".to_string()),
210                        children: else_children,
211                    });
212                }
213                return;
214            }
215        }
216        _ => {}
217    }
218
219    for i in 0..node.child_count() {
220        let child = node.child(i).unwrap();
221        trace_node_recursive(&child, source, depth, steps);
222    }
223}
224
225fn classify_method_call(text: &str) -> Option<FlowStep> {
226    // DB writes (SQLAlchemy, Django ORM)
227    if text.contains(".save(")
228        || text.contains(".add(")
229        || text.contains(".commit(")
230        || text.contains(".bulk_create(")
231        || text.contains(".bulk_update(")
232    {
233        let target = text.split('.').next().unwrap_or("db").trim();
234        return Some(FlowStep {
235            actor: if target == "db" || target == "session" {
236                "DB".to_string()
237            } else {
238                target.to_string()
239            },
240            method: "save".to_string(),
241            kind: FlowStepKind::DbWrite,
242            description: format!("DB write: {}", target),
243            condition: None,
244            children: Vec::new(),
245        });
246    }
247
248    if text.contains(".delete(") && !text.contains("request.") {
249        let target = text.split('.').next().unwrap_or("").trim();
250        return Some(FlowStep {
251            actor: target.to_string(),
252            method: "delete".to_string(),
253            kind: FlowStepKind::DbWrite,
254            description: format!("DB delete: {}", target),
255            condition: None,
256            children: Vec::new(),
257        });
258    }
259
260    // DB reads
261    if text.contains(".query(")
262        || text.contains(".filter(")
263        || text.contains(".get(") && text.contains(".objects")
264        || text.contains(".all(")
265        || text.contains(".first(")
266        || text.contains(".execute(")
267    {
268        let target = text.split('.').next().unwrap_or("db").trim();
269        return Some(FlowStep {
270            actor: if target == "db" || target == "session" {
271                "DB".to_string()
272            } else {
273                target.to_string()
274            },
275            method: "query".to_string(),
276            kind: FlowStepKind::DbRead,
277            description: format!("DB read: {}", target),
278            condition: None,
279            children: Vec::new(),
280        });
281    }
282
283    // Events (Celery, event bus)
284    if text.contains(".publish(")
285        || text.contains("send_task(")
286        || text.contains(".delay(")
287        || text.contains(".apply_async(")
288    {
289        return Some(FlowStep {
290            actor: "EventBus".to_string(),
291            method: "publish".to_string(),
292            kind: FlowStepKind::EventPublish,
293            description: "Publish event".to_string(),
294            condition: None,
295            children: Vec::new(),
296        });
297    }
298
299    // Service calls: service.method() or self.service.method()
300    if text.contains('.') && text.contains('(') {
301        let clean = text.trim_start_matches("await ");
302        let parts: Vec<&str> = clean.splitn(2, '.').collect();
303        if parts.len() == 2 {
304            let target = parts[0].trim();
305            let rest = parts[1];
306            let method = rest.split('(').next().unwrap_or("").trim();
307
308            // self.service.method() → service.method()
309            if target == "self" && rest.contains('.') {
310                let inner: Vec<&str> = rest.splitn(2, '.').collect();
311                if inner.len() == 2 {
312                    let svc = inner[0].trim();
313                    let svc_method = inner[1].split('(').next().unwrap_or("").trim();
314                    if !svc.is_empty()
315                        && svc.chars().next().is_some_and(|c| c.is_lowercase())
316                        && !is_excluded_py_target(svc)
317                        && !is_excluded_py_method(svc_method)
318                    {
319                        return Some(FlowStep {
320                            actor: svc.to_string(),
321                            method: svc_method.to_string(),
322                            kind: FlowStepKind::ServiceCall,
323                            description: format!("Call: {}.{}()", svc, svc_method),
324                            condition: None,
325                            children: Vec::new(),
326                        });
327                    }
328                }
329            }
330
331            if !target.is_empty()
332                && target.chars().next().is_some_and(|c| c.is_lowercase())
333                && !is_excluded_py_target(target)
334                && !is_excluded_py_method(method)
335            {
336                return Some(FlowStep {
337                    actor: target.to_string(),
338                    method: method.to_string(),
339                    kind: FlowStepKind::ServiceCall,
340                    description: format!("Call: {}.{}()", target, method),
341                    condition: None,
342                    children: Vec::new(),
343                });
344            }
345        }
346    }
347
348    None
349}
350
351fn is_excluded_py_target(target: &str) -> bool {
352    matches!(
353        target,
354        "self"
355            | "cls"
356            | "super"
357            | "print"
358            | "len"
359            | "range"
360            | "str"
361            | "int"
362            | "float"
363            | "bool"
364            | "list"
365            | "dict"
366            | "set"
367            | "tuple"
368            | "type"
369            | "isinstance"
370            | "logger"
371            | "logging"
372            | "os"
373            | "sys"
374            | "json"
375            | "re"
376            | "request"
377            | "response"
378    )
379}
380
381fn is_excluded_py_method(method: &str) -> bool {
382    matches!(
383        method,
384        "append"
385            | "extend"
386            | "insert"
387            | "pop"
388            | "remove"
389            | "sort"
390            | "reverse"
391            | "keys"
392            | "values"
393            | "items"
394            | "get"
395            | "update"
396            | "format"
397            | "strip"
398            | "split"
399            | "join"
400            | "replace"
401            | "lower"
402            | "upper"
403            | "startswith"
404            | "endswith"
405            | "encode"
406            | "decode"
407            | "info"
408            | "debug"
409            | "warning"
410            | "error"
411            | "exception"
412            | "critical"
413    )
414}
415
416// ==================== Text-based Fallback ====================
417
418fn find_endpoint_function_text(
419    root: &tree_sitter::Node,
420    source: &[u8],
421    endpoint: &Endpoint,
422) -> Option<String> {
423    let method_lower = format!("{:?}", endpoint.method).to_lowercase();
424
425    for i in 0..root.named_child_count() {
426        let node = root.named_child(i).unwrap();
427        if node.kind() == "decorated_definition" {
428            let decorators = collect_decorators(&node, source);
429            for dec in &decorators {
430                let dec_method = dec.name.to_lowercase();
431                if dec_method == method_lower
432                    || dec
433                        .full_name
434                        .to_lowercase()
435                        .ends_with(&format!(".{}", method_lower))
436                {
437                    if let Some(path_arg) = dec.args.first() {
438                        let clean_path = clean_string_literal(path_arg);
439                        if !endpoint.path.ends_with(&clean_path) && !clean_path.is_empty() {
440                            continue;
441                        }
442                    }
443                    if let Some(func) = node.child_by_field_name("definition")
444                        && let Some(body) = func.child_by_field_name("body")
445                    {
446                        return Some(node_text(&body, source));
447                    }
448                }
449            }
450        }
451    }
452    None
453}
454
455fn trace_source_text(source: &str, steps: &mut Vec<FlowStep>) {
456    for line in source.lines() {
457        let trimmed = line.trim();
458        if trimmed.contains(".save(") || trimmed.contains(".add(") || trimmed.contains(".commit(") {
459            steps.push(FlowStep {
460                actor: "DB".to_string(),
461                method: "save".to_string(),
462                kind: FlowStepKind::DbWrite,
463                description: "DB write".to_string(),
464                condition: None,
465                children: Vec::new(),
466            });
467        } else if trimmed.contains(".query(")
468            || trimmed.contains(".filter(") && trimmed.contains(".all()")
469        {
470            steps.push(FlowStep {
471                actor: "DB".to_string(),
472                method: "query".to_string(),
473                kind: FlowStepKind::DbRead,
474                description: "DB query".to_string(),
475                condition: None,
476                children: Vec::new(),
477            });
478        } else if trimmed.contains("publish(") || trimmed.contains("send_task(") {
479            steps.push(FlowStep {
480                actor: "EventBus".to_string(),
481                method: "publish".to_string(),
482                kind: FlowStepKind::EventPublish,
483                description: "Publish event".to_string(),
484                condition: None,
485                children: Vec::new(),
486            });
487        }
488    }
489}