Skip to main content

cgx_engine/parsers/
php.rs

1use tree_sitter::{Node, Parser, Query, QueryCursor};
2
3use crate::parser::{EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult};
4use crate::walker::SourceFile;
5
6pub struct PhpParser {
7    language: tree_sitter::Language,
8}
9
10impl PhpParser {
11    pub fn new() -> Self {
12        Self {
13            language: tree_sitter_php::language_php(),
14        }
15    }
16}
17
18impl Default for PhpParser {
19    fn default() -> Self {
20        Self::new()
21    }
22}
23
24impl LanguageParser for PhpParser {
25    fn extensions(&self) -> &[&str] {
26        &["php"]
27    }
28
29    fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
30        let mut parser = Parser::new();
31        parser.set_language(&self.language)?;
32
33        let tree = parser
34            .parse(&file.content, None)
35            .ok_or_else(|| anyhow::anyhow!("failed to parse {}", file.relative_path))?;
36
37        let source_bytes = file.content.as_bytes();
38        let root = tree.root_node();
39        let mut nodes = Vec::new();
40        let mut edges = Vec::new();
41
42        let fp = file_node_id(&file.relative_path);
43
44        // Parse function definitions
45        if let Ok(query) = Query::new(
46            &self.language,
47            "(function_definition name: (name) @name) @fn",
48        ) {
49            extract_nodes(
50                &mut nodes,
51                &mut edges,
52                file,
53                &query,
54                root,
55                source_bytes,
56                NodeKind::Function,
57                "fn",
58                &fp,
59            );
60        }
61
62        // Parse class declarations
63        if let Ok(query) = Query::new(
64            &self.language,
65            "(class_declaration name: (name) @name) @cls",
66        ) {
67            extract_nodes(
68                &mut nodes,
69                &mut edges,
70                file,
71                &query,
72                root,
73                source_bytes,
74                NodeKind::Class,
75                "cls",
76                &fp,
77            );
78        }
79
80        // Parse interface declarations
81        if let Ok(query) = Query::new(
82            &self.language,
83            "(interface_declaration name: (name) @name) @cls",
84        ) {
85            extract_nodes(
86                &mut nodes,
87                &mut edges,
88                file,
89                &query,
90                root,
91                source_bytes,
92                NodeKind::Class,
93                "cls",
94                &fp,
95            );
96        }
97
98        // Parse method declarations
99        if let Ok(query) = Query::new(
100            &self.language,
101            "(method_declaration name: (name) @name) @fn",
102        ) {
103            extract_nodes(
104                &mut nodes,
105                &mut edges,
106                file,
107                &query,
108                root,
109                source_bytes,
110                NodeKind::Function,
111                "fn",
112                &fp,
113            );
114        }
115
116        // Parse include/require
117        extract_includes(&mut edges, root, source_bytes, &fp, file);
118
119        // Extract calls
120        extract_calls(&mut edges, root, source_bytes, file);
121
122        Ok(ParseResult {
123            nodes,
124            edges,
125            ..Default::default()
126        })
127    }
128}
129
130fn file_node_id(rel_path: &str) -> String {
131    format!("file:{}", rel_path)
132}
133
134#[allow(clippy::too_many_arguments)]
135fn extract_nodes(
136    nodes: &mut Vec<NodeDef>,
137    edges: &mut Vec<EdgeDef>,
138    file: &SourceFile,
139    query: &Query,
140    root: tree_sitter::Node,
141    source_bytes: &[u8],
142    kind: NodeKind,
143    prefix: &str,
144    file_id: &str,
145) {
146    let mut cursor = QueryCursor::new();
147    for m in cursor.matches(query, root, source_bytes) {
148        let Some(name_capture) = m
149            .captures
150            .iter()
151            .find(|c| query.capture_names()[c.index as usize] == "name")
152        else {
153            continue;
154        };
155
156        let name = node_text(name_capture.node, source_bytes);
157        let node_start = name_capture.node.start_position();
158
159        let body_end = m
160            .captures
161            .iter()
162            .find(|c| {
163                let cap_name = &query.capture_names()[c.index as usize];
164                *cap_name == "fn" || *cap_name == "cls"
165            })
166            .map(|c| c.node.end_position())
167            .unwrap_or_else(|| name_capture.node.end_position());
168
169        let id = format!("{}:{}:{}", prefix, file.relative_path, name);
170
171        nodes.push(NodeDef {
172            id: id.clone(),
173            kind: kind.clone(),
174            name: name.clone(),
175            path: file.relative_path.clone(),
176            line_start: node_start.row as u32 + 1,
177            line_end: body_end.row as u32 + 1,
178            ..Default::default()
179        });
180
181        edges.push(EdgeDef {
182            src: file_id.to_string(),
183            dst: id,
184            kind: EdgeKind::Exports,
185            ..Default::default()
186        });
187    }
188}
189
190fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
191    node.utf8_text(source).unwrap_or("").to_string()
192}
193
194fn extract_includes(
195    edges: &mut Vec<EdgeDef>,
196    root: tree_sitter::Node,
197    source_bytes: &[u8],
198    file_id: &str,
199    file: &SourceFile,
200) {
201    let mut cursor = root.walk();
202    traverse_includes(edges, root, source_bytes, file_id, file, &mut cursor);
203}
204
205fn traverse_includes(
206    edges: &mut Vec<EdgeDef>,
207    node: tree_sitter::Node,
208    source_bytes: &[u8],
209    file_id: &str,
210    file: &SourceFile,
211    cursor: &mut tree_sitter::TreeCursor,
212) {
213    // PHP includes: include "file.php", require "file.php", include_once, require_once
214    if node.kind() == "include_expression" || node.kind() == "require_expression" {
215        for j in 0..node.child_count() {
216            let Some(child) = node.child(j) else { continue };
217            if child.kind() == "string" {
218                let include_path = unquote_str(&source_bytes[child.byte_range()]);
219                if !include_path.is_empty() {
220                    let resolved = resolve_include_path(&file.relative_path, &include_path);
221                    if !resolved.is_empty() {
222                        edges.push(EdgeDef {
223                            src: file_id.to_string(),
224                            dst: file_node_id(&resolved),
225                            kind: EdgeKind::Imports,
226                            ..Default::default()
227                        });
228                    }
229                }
230            }
231        }
232    }
233
234    if cursor.goto_first_child() {
235        loop {
236            let child = cursor.node();
237            traverse_includes(edges, child, source_bytes, file_id, file, cursor);
238            if !cursor.goto_next_sibling() {
239                break;
240            }
241        }
242        cursor.goto_parent();
243    }
244}
245
246fn unquote_str(s: &[u8]) -> String {
247    let s = std::str::from_utf8(s).unwrap_or("");
248    s.trim().trim_matches('\'').trim_matches('"').to_string()
249}
250
251fn resolve_include_path(current: &str, import: &str) -> String {
252    let mut parts: Vec<&str> = current.split('/').collect();
253    parts.pop(); // remove filename
254
255    for segment in import.split('/') {
256        match segment {
257            "." => {}
258            ".." => {
259                parts.pop();
260            }
261            _ => parts.push(segment),
262        }
263    }
264
265    parts.join("/")
266}
267
268fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
269    let mut fn_stack: Vec<String> = Vec::new();
270    walk_for_calls(edges, root, source, file, &mut fn_stack);
271}
272
273fn is_fn_node(kind: &str) -> bool {
274    matches!(
275        kind,
276        "function_definition" | "method_declaration" | "anonymous_function_creation_expression"
277    )
278}
279
280fn fn_name_from_node(node: Node, source: &[u8], file: &SourceFile) -> Option<String> {
281    if let Some(name_node) = node.child_by_field_name("name") {
282        let name = name_node.utf8_text(source).unwrap_or("").to_string();
283        if !name.is_empty() {
284            return Some(format!("fn:{}:{}", file.relative_path, name));
285        }
286    }
287    None
288}
289
290fn walk_for_calls(
291    edges: &mut Vec<EdgeDef>,
292    node: Node,
293    source: &[u8],
294    file: &SourceFile,
295    fn_stack: &mut Vec<String>,
296) {
297    let kind = node.kind();
298    let pushed = is_fn_node(kind);
299
300    if pushed {
301        if let Some(id) = fn_name_from_node(node, source, file) {
302            fn_stack.push(id);
303        } else {
304            fn_stack.push(String::new());
305        }
306    }
307
308    if kind == "function_call_expression" {
309        if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
310            let callee_name = node
311                .child_by_field_name("function")
312                .and_then(|func| match func.kind() {
313                    "name" => Some(func.utf8_text(source).unwrap_or("").to_string()),
314                    "qualified_name" => {
315                        // Namespace\Class::method or Class::method
316                        Some(func.utf8_text(source).unwrap_or("").to_string())
317                    }
318                    "member_access_expression" => func
319                        .child_by_field_name("name")
320                        .map(|p| p.utf8_text(source).unwrap_or("").to_string()),
321                    _ => None,
322                })
323                .unwrap_or_default();
324
325            if !callee_name.is_empty() {
326                edges.push(EdgeDef {
327                    src: caller_id.clone(),
328                    dst: callee_name,
329                    kind: EdgeKind::Calls,
330                    confidence: 0.7,
331                    ..Default::default()
332                });
333            }
334        }
335    }
336
337    let mut cursor = node.walk();
338    if cursor.goto_first_child() {
339        loop {
340            walk_for_calls(edges, cursor.node(), source, file, fn_stack);
341            if !cursor.goto_next_sibling() {
342                break;
343            }
344        }
345    }
346
347    if pushed {
348        fn_stack.pop();
349    }
350}