Skip to main content

cgx_engine/parsers/
php.rs

1use tree_sitter::{Parser, Query, QueryCursor, Node};
2
3use crate::parser::{EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult};
4use crate::walker::SourceFile;
5
6pub struct PhpParser {
7    language: tree_sitter::Language,
8}
9
10impl PhpParser {
11    pub fn new() -> Self {
12        Self {
13            language: tree_sitter_php::language_php(),
14        }
15    }
16}
17
18impl Default for PhpParser {
19    fn default() -> Self {
20        Self::new()
21    }
22}
23
24impl LanguageParser for PhpParser {
25    fn extensions(&self) -> &[&str] {
26        &["php"]
27    }
28
29    fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
30        let mut parser = Parser::new();
31        parser.set_language(&self.language)?;
32
33        let tree = parser.parse(&file.content, None).ok_or_else(|| {
34            anyhow::anyhow!("failed to parse {}", file.relative_path)
35        })?;
36
37        let source_bytes = file.content.as_bytes();
38        let root = tree.root_node();
39        let mut nodes = Vec::new();
40        let mut edges = Vec::new();
41
42        let fp = file_node_id(&file.relative_path);
43
44        // Parse function definitions
45        if let Ok(query) = Query::new(
46            &self.language,
47            "(function_definition name: (name) @name) @fn",
48        ) {
49            extract_nodes(
50                &mut nodes, &mut edges, file, &query, root, source_bytes,
51                NodeKind::Function, "fn", &fp,
52            );
53        }
54
55        // Parse class declarations
56        if let Ok(query) = Query::new(
57            &self.language,
58            "(class_declaration name: (name) @name) @cls",
59        ) {
60            extract_nodes(
61                &mut nodes, &mut edges, file, &query, root, source_bytes,
62                NodeKind::Class, "cls", &fp,
63            );
64        }
65
66        // Parse interface declarations
67        if let Ok(query) = Query::new(
68            &self.language,
69            "(interface_declaration name: (name) @name) @cls",
70        ) {
71            extract_nodes(
72                &mut nodes, &mut edges, file, &query, root, source_bytes,
73                NodeKind::Class, "cls", &fp,
74            );
75        }
76
77        // Parse method declarations
78        if let Ok(query) = Query::new(
79            &self.language,
80            "(method_declaration name: (name) @name) @fn",
81        ) {
82            extract_nodes(
83                &mut nodes, &mut edges, file, &query, root, source_bytes,
84                NodeKind::Function, "fn", &fp,
85            );
86        }
87
88        // Parse include/require
89        extract_includes(&mut edges, root, source_bytes, &fp, file);
90
91        // Extract calls
92        extract_calls(&mut edges, root, source_bytes, file);
93
94        Ok(ParseResult { nodes, edges })
95    }
96}
97
98fn file_node_id(rel_path: &str) -> String {
99    format!("file:{}", rel_path)
100}
101
102#[allow(clippy::too_many_arguments)]
103fn extract_nodes(
104    nodes: &mut Vec<NodeDef>,
105    edges: &mut Vec<EdgeDef>,
106    file: &SourceFile,
107    query: &Query,
108    root: tree_sitter::Node,
109    source_bytes: &[u8],
110    kind: NodeKind,
111    prefix: &str,
112    file_id: &str,
113) {
114    let mut cursor = QueryCursor::new();
115    for m in cursor.matches(query, root, source_bytes) {
116        let Some(name_capture) = m
117            .captures
118            .iter()
119            .find(|c| query.capture_names()[c.index as usize] == "name")
120        else {
121            continue;
122        };
123
124        let name = node_text(name_capture.node, source_bytes);
125        let node_start = name_capture.node.start_position();
126
127        let body_end = m
128            .captures
129            .iter()
130            .find(|c| {
131                let cap_name = &query.capture_names()[c.index as usize];
132                *cap_name == "fn" || *cap_name == "cls"
133            })
134            .map(|c| c.node.end_position())
135            .unwrap_or_else(|| name_capture.node.end_position());
136
137        let id = format!("{}:{}:{}", prefix, file.relative_path, name);
138
139        nodes.push(NodeDef {
140            id: id.clone(),
141            kind: kind.clone(),
142            name: name.clone(),
143            path: file.relative_path.clone(),
144            line_start: node_start.row as u32 + 1,
145            line_end: body_end.row as u32 + 1,
146            ..Default::default()
147        });
148
149        edges.push(EdgeDef {
150            src: file_id.to_string(),
151            dst: id,
152            kind: EdgeKind::Exports,
153            ..Default::default()
154        });
155    }
156}
157
158fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
159    node.utf8_text(source).unwrap_or("").to_string()
160}
161
162fn extract_includes(
163    edges: &mut Vec<EdgeDef>,
164    root: tree_sitter::Node,
165    source_bytes: &[u8],
166    file_id: &str,
167    file: &SourceFile,
168) {
169    let mut cursor = root.walk();
170    traverse_includes(edges, root, source_bytes, file_id, file, &mut cursor);
171}
172
173fn traverse_includes(
174    edges: &mut Vec<EdgeDef>,
175    node: tree_sitter::Node,
176    source_bytes: &[u8],
177    file_id: &str,
178    file: &SourceFile,
179    cursor: &mut tree_sitter::TreeCursor,
180) {
181    // PHP includes: include "file.php", require "file.php", include_once, require_once
182    if node.kind() == "include_expression" || node.kind() == "require_expression" {
183        for j in 0..node.child_count() {
184            let Some(child) = node.child(j) else { continue };
185            if child.kind() == "string" {
186                let include_path = unquote_str(&source_bytes[child.byte_range()]);
187                if !include_path.is_empty() {
188                    let resolved = resolve_include_path(&file.relative_path, &include_path);
189                    if !resolved.is_empty() {
190                        edges.push(EdgeDef {
191                            src: file_id.to_string(),
192                            dst: file_node_id(&resolved),
193                            kind: EdgeKind::Imports,
194                            ..Default::default()
195                        });
196                    }
197                }
198            }
199        }
200    }
201
202    if cursor.goto_first_child() {
203        loop {
204            let child = cursor.node();
205            traverse_includes(edges, child, source_bytes, file_id, file, cursor);
206            if !cursor.goto_next_sibling() {
207                break;
208            }
209        }
210        cursor.goto_parent();
211    }
212}
213
214fn unquote_str(s: &[u8]) -> String {
215    let s = std::str::from_utf8(s).unwrap_or("");
216    s.trim().trim_matches('\'').trim_matches('"').to_string()
217}
218
219fn resolve_include_path(current: &str, import: &str) -> String {
220    let mut parts: Vec<&str> = current.split('/').collect();
221    parts.pop(); // remove filename
222
223    for segment in import.split('/') {
224        match segment {
225            "." => {}
226            ".." => {
227                parts.pop();
228            }
229            _ => parts.push(segment),
230        }
231    }
232
233    parts.join("/")
234}
235
236fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
237    let mut fn_stack: Vec<String> = Vec::new();
238    walk_for_calls(edges, root, source, file, &mut fn_stack);
239}
240
241fn is_fn_node(kind: &str) -> bool {
242    matches!(kind, "function_definition" | "method_declaration" | "anonymous_function_creation_expression")
243}
244
245fn fn_name_from_node(node: Node, source: &[u8], file: &SourceFile) -> Option<String> {
246    if let Some(name_node) = node.child_by_field_name("name") {
247        let name = name_node.utf8_text(source).unwrap_or("").to_string();
248        if !name.is_empty() {
249            return Some(format!("fn:{}:{}", file.relative_path, name));
250        }
251    }
252    None
253}
254
255fn walk_for_calls(
256    edges: &mut Vec<EdgeDef>,
257    node: Node,
258    source: &[u8],
259    file: &SourceFile,
260    fn_stack: &mut Vec<String>,
261) {
262    let kind = node.kind();
263    let pushed = is_fn_node(kind);
264
265    if pushed {
266        if let Some(id) = fn_name_from_node(node, source, file) {
267            fn_stack.push(id);
268        } else {
269            fn_stack.push(String::new());
270        }
271    }
272
273    if kind == "function_call_expression" {
274        if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
275            let callee_name = node
276                .child_by_field_name("function")
277                .and_then(|func| match func.kind() {
278                    "name" => Some(func.utf8_text(source).unwrap_or("").to_string()),
279                    "qualified_name" => {
280                        // Namespace\Class::method or Class::method
281                        Some(func.utf8_text(source).unwrap_or("").to_string())
282                    }
283                    "member_access_expression" => func
284                        .child_by_field_name("name")
285                        .map(|p| p.utf8_text(source).unwrap_or("").to_string()),
286                    _ => None,
287                })
288                .unwrap_or_default();
289
290            if !callee_name.is_empty() {
291                edges.push(EdgeDef {
292                    src: caller_id.clone(),
293                    dst: callee_name,
294                    kind: EdgeKind::Calls,
295                    confidence: 0.7,
296                    ..Default::default()
297                });
298            }
299        }
300    }
301
302    let mut cursor = node.walk();
303    if cursor.goto_first_child() {
304        loop {
305            walk_for_calls(edges, cursor.node(), source, file, fn_stack);
306            if !cursor.goto_next_sibling() {
307                break;
308            }
309        }
310    }
311
312    if pushed {
313        fn_stack.pop();
314    }
315}