Skip to main content

cgx_engine/parsers/
php.rs

1use tree_sitter::{Node, Parser, Query, QueryCursor};
2
3use crate::parser::{
4    collect_doc_block_above, meta_set, EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind,
5    ParseResult,
6};
7use crate::walker::SourceFile;
8
9pub struct PhpParser {
10    language: tree_sitter::Language,
11}
12
13impl PhpParser {
14    pub fn new() -> Self {
15        Self {
16            language: tree_sitter_php::language_php(),
17        }
18    }
19}
20
21impl Default for PhpParser {
22    fn default() -> Self {
23        Self::new()
24    }
25}
26
27impl LanguageParser for PhpParser {
28    fn extensions(&self) -> &[&str] {
29        &["php"]
30    }
31
32    fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
33        let mut parser = Parser::new();
34        parser.set_language(&self.language)?;
35
36        let tree = parser
37            .parse(&file.content, None)
38            .ok_or_else(|| anyhow::anyhow!("failed to parse {}", file.relative_path))?;
39
40        let source_bytes = file.content.as_bytes();
41        let root = tree.root_node();
42        let mut nodes = Vec::new();
43        let mut edges = Vec::new();
44
45        let fp = file_node_id(&file.relative_path);
46
47        // Parse function definitions
48        if let Ok(query) = Query::new(
49            &self.language,
50            "(function_definition name: (name) @name) @fn",
51        ) {
52            extract_nodes(
53                &mut nodes,
54                &mut edges,
55                file,
56                &query,
57                root,
58                source_bytes,
59                NodeKind::Function,
60                "fn",
61                &fp,
62            );
63        }
64
65        // Parse class declarations
66        if let Ok(query) = Query::new(
67            &self.language,
68            "(class_declaration name: (name) @name) @cls",
69        ) {
70            extract_nodes(
71                &mut nodes,
72                &mut edges,
73                file,
74                &query,
75                root,
76                source_bytes,
77                NodeKind::Class,
78                "cls",
79                &fp,
80            );
81        }
82
83        // Parse interface declarations
84        if let Ok(query) = Query::new(
85            &self.language,
86            "(interface_declaration name: (name) @name) @cls",
87        ) {
88            extract_nodes(
89                &mut nodes,
90                &mut edges,
91                file,
92                &query,
93                root,
94                source_bytes,
95                NodeKind::Class,
96                "cls",
97                &fp,
98            );
99        }
100
101        // Parse method declarations
102        if let Ok(query) = Query::new(
103            &self.language,
104            "(method_declaration name: (name) @name) @fn",
105        ) {
106            extract_nodes(
107                &mut nodes,
108                &mut edges,
109                file,
110                &query,
111                root,
112                source_bytes,
113                NodeKind::Function,
114                "fn",
115                &fp,
116            );
117        }
118
119        // Parse include/require
120        extract_includes(&mut edges, root, source_bytes, &fp, file);
121
122        // Extract calls
123        extract_calls(&mut edges, root, source_bytes, file);
124
125        Ok(ParseResult {
126            nodes,
127            edges,
128            ..Default::default()
129        })
130    }
131}
132
133fn file_node_id(rel_path: &str) -> String {
134    format!("file:{}", rel_path)
135}
136
137#[allow(clippy::too_many_arguments)]
138fn extract_nodes(
139    nodes: &mut Vec<NodeDef>,
140    edges: &mut Vec<EdgeDef>,
141    file: &SourceFile,
142    query: &Query,
143    root: tree_sitter::Node,
144    source_bytes: &[u8],
145    kind: NodeKind,
146    prefix: &str,
147    file_id: &str,
148) {
149    let mut cursor = QueryCursor::new();
150    for m in cursor.matches(query, root, source_bytes) {
151        let Some(name_capture) = m
152            .captures
153            .iter()
154            .find(|c| query.capture_names()[c.index as usize] == "name")
155        else {
156            continue;
157        };
158
159        let name = node_text(name_capture.node, source_bytes);
160        let node_start = name_capture.node.start_position();
161
162        let item_node = m
163            .captures
164            .iter()
165            .find(|c| {
166                let cap_name = &query.capture_names()[c.index as usize];
167                *cap_name == "fn" || *cap_name == "cls"
168            })
169            .map(|c| c.node);
170        let body_end = item_node
171            .map(|n| n.end_position())
172            .unwrap_or_else(|| name_capture.node.end_position());
173
174        let id = format!("{}:{}:{}", prefix, file.relative_path, name);
175
176        let doc_comment = item_node
177            .and_then(|n| collect_doc_block_above(n, source_bytes, is_phpdoc_comment))
178            .map(strip_phpdoc_markers);
179
180        let mut def = NodeDef {
181            id: id.clone(),
182            kind: kind.clone(),
183            name: name.clone(),
184            path: file.relative_path.clone(),
185            line_start: node_start.row as u32 + 1,
186            line_end: body_end.row as u32 + 1,
187            ..Default::default()
188        };
189        if let Some(doc) = doc_comment {
190            meta_set(&mut def, "doc_comment", serde_json::Value::String(doc));
191        }
192        nodes.push(def);
193
194        edges.push(EdgeDef {
195            src: file_id.to_string(),
196            dst: id,
197            kind: EdgeKind::Exports,
198            ..Default::default()
199        });
200    }
201}
202
203fn is_phpdoc_comment(text: &str) -> bool {
204    text.trim_start().starts_with("/**")
205}
206
207fn strip_phpdoc_markers(raw: String) -> String {
208    let mut out: Vec<String> = Vec::new();
209    for line in raw.lines() {
210        let l = line.trim();
211        let stripped = if l.starts_with("/**") {
212            l.trim_start_matches("/**")
213                .trim_end_matches("*/")
214                .trim()
215                .to_string()
216        } else if l.starts_with("*/") {
217            String::new()
218        } else if let Some(rest) = l.strip_prefix('*') {
219            rest.trim().to_string()
220        } else {
221            l.to_string()
222        };
223        out.push(stripped);
224    }
225    out.join("\n").trim().to_string()
226}
227
228fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
229    node.utf8_text(source).unwrap_or("").to_string()
230}
231
232fn extract_includes(
233    edges: &mut Vec<EdgeDef>,
234    root: tree_sitter::Node,
235    source_bytes: &[u8],
236    file_id: &str,
237    file: &SourceFile,
238) {
239    let mut cursor = root.walk();
240    traverse_includes(edges, root, source_bytes, file_id, file, &mut cursor);
241}
242
243fn traverse_includes(
244    edges: &mut Vec<EdgeDef>,
245    node: tree_sitter::Node,
246    source_bytes: &[u8],
247    file_id: &str,
248    file: &SourceFile,
249    cursor: &mut tree_sitter::TreeCursor,
250) {
251    // PHP includes: include "file.php", require "file.php", include_once, require_once
252    if node.kind() == "include_expression" || node.kind() == "require_expression" {
253        for j in 0..node.child_count() {
254            let Some(child) = node.child(j) else { continue };
255            if child.kind() == "string" {
256                let include_path = unquote_str(&source_bytes[child.byte_range()]);
257                if !include_path.is_empty() {
258                    let resolved = resolve_include_path(&file.relative_path, &include_path);
259                    if !resolved.is_empty() {
260                        edges.push(EdgeDef {
261                            src: file_id.to_string(),
262                            dst: file_node_id(&resolved),
263                            kind: EdgeKind::Imports,
264                            ..Default::default()
265                        });
266                    }
267                }
268            }
269        }
270    }
271
272    if cursor.goto_first_child() {
273        loop {
274            let child = cursor.node();
275            traverse_includes(edges, child, source_bytes, file_id, file, cursor);
276            if !cursor.goto_next_sibling() {
277                break;
278            }
279        }
280        cursor.goto_parent();
281    }
282}
283
284fn unquote_str(s: &[u8]) -> String {
285    let s = std::str::from_utf8(s).unwrap_or("");
286    s.trim().trim_matches('\'').trim_matches('"').to_string()
287}
288
289fn resolve_include_path(current: &str, import: &str) -> String {
290    let mut parts: Vec<&str> = current.split('/').collect();
291    parts.pop(); // remove filename
292
293    for segment in import.split('/') {
294        match segment {
295            "." => {}
296            ".." => {
297                parts.pop();
298            }
299            _ => parts.push(segment),
300        }
301    }
302
303    parts.join("/")
304}
305
306fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
307    let mut fn_stack: Vec<String> = Vec::new();
308    walk_for_calls(edges, root, source, file, &mut fn_stack);
309}
310
311fn is_fn_node(kind: &str) -> bool {
312    matches!(
313        kind,
314        "function_definition" | "method_declaration" | "anonymous_function_creation_expression"
315    )
316}
317
318fn fn_name_from_node(node: Node, source: &[u8], file: &SourceFile) -> Option<String> {
319    if let Some(name_node) = node.child_by_field_name("name") {
320        let name = name_node.utf8_text(source).unwrap_or("").to_string();
321        if !name.is_empty() {
322            return Some(format!("fn:{}:{}", file.relative_path, name));
323        }
324    }
325    None
326}
327
328fn walk_for_calls(
329    edges: &mut Vec<EdgeDef>,
330    node: Node,
331    source: &[u8],
332    file: &SourceFile,
333    fn_stack: &mut Vec<String>,
334) {
335    let kind = node.kind();
336    let pushed = is_fn_node(kind);
337
338    if pushed {
339        if let Some(id) = fn_name_from_node(node, source, file) {
340            fn_stack.push(id);
341        } else {
342            fn_stack.push(String::new());
343        }
344    }
345
346    if kind == "function_call_expression" {
347        if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
348            let callee_name = node
349                .child_by_field_name("function")
350                .and_then(|func| match func.kind() {
351                    "name" => Some(func.utf8_text(source).unwrap_or("").to_string()),
352                    "qualified_name" => {
353                        // Namespace\Class::method or Class::method
354                        Some(func.utf8_text(source).unwrap_or("").to_string())
355                    }
356                    "member_access_expression" => func
357                        .child_by_field_name("name")
358                        .map(|p| p.utf8_text(source).unwrap_or("").to_string()),
359                    _ => None,
360                })
361                .unwrap_or_default();
362
363            if !callee_name.is_empty() {
364                edges.push(EdgeDef {
365                    src: caller_id.clone(),
366                    dst: callee_name,
367                    kind: EdgeKind::Calls,
368                    confidence: 0.7,
369                    ..Default::default()
370                });
371            }
372        }
373    }
374
375    let mut cursor = node.walk();
376    if cursor.goto_first_child() {
377        loop {
378            walk_for_calls(edges, cursor.node(), source, file, fn_stack);
379            if !cursor.goto_next_sibling() {
380                break;
381            }
382        }
383    }
384
385    if pushed {
386        fn_stack.pop();
387    }
388}