1use tree_sitter::{Node, Parser, Query, QueryCursor};
2
3use crate::parser::{EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult};
4use crate::walker::SourceFile;
5
6pub struct PhpParser {
7 language: tree_sitter::Language,
8}
9
10impl PhpParser {
11 pub fn new() -> Self {
12 Self {
13 language: tree_sitter_php::language_php(),
14 }
15 }
16}
17
18impl Default for PhpParser {
19 fn default() -> Self {
20 Self::new()
21 }
22}
23
24impl LanguageParser for PhpParser {
25 fn extensions(&self) -> &[&str] {
26 &["php"]
27 }
28
29 fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
30 let mut parser = Parser::new();
31 parser.set_language(&self.language)?;
32
33 let tree = parser
34 .parse(&file.content, None)
35 .ok_or_else(|| anyhow::anyhow!("failed to parse {}", file.relative_path))?;
36
37 let source_bytes = file.content.as_bytes();
38 let root = tree.root_node();
39 let mut nodes = Vec::new();
40 let mut edges = Vec::new();
41
42 let fp = file_node_id(&file.relative_path);
43
44 if let Ok(query) = Query::new(
46 &self.language,
47 "(function_definition name: (name) @name) @fn",
48 ) {
49 extract_nodes(
50 &mut nodes,
51 &mut edges,
52 file,
53 &query,
54 root,
55 source_bytes,
56 NodeKind::Function,
57 "fn",
58 &fp,
59 );
60 }
61
62 if let Ok(query) = Query::new(
64 &self.language,
65 "(class_declaration name: (name) @name) @cls",
66 ) {
67 extract_nodes(
68 &mut nodes,
69 &mut edges,
70 file,
71 &query,
72 root,
73 source_bytes,
74 NodeKind::Class,
75 "cls",
76 &fp,
77 );
78 }
79
80 if let Ok(query) = Query::new(
82 &self.language,
83 "(interface_declaration name: (name) @name) @cls",
84 ) {
85 extract_nodes(
86 &mut nodes,
87 &mut edges,
88 file,
89 &query,
90 root,
91 source_bytes,
92 NodeKind::Class,
93 "cls",
94 &fp,
95 );
96 }
97
98 if let Ok(query) = Query::new(
100 &self.language,
101 "(method_declaration name: (name) @name) @fn",
102 ) {
103 extract_nodes(
104 &mut nodes,
105 &mut edges,
106 file,
107 &query,
108 root,
109 source_bytes,
110 NodeKind::Function,
111 "fn",
112 &fp,
113 );
114 }
115
116 extract_includes(&mut edges, root, source_bytes, &fp, file);
118
119 extract_calls(&mut edges, root, source_bytes, file);
121
122 Ok(ParseResult {
123 nodes,
124 edges,
125 ..Default::default()
126 })
127 }
128}
129
130fn file_node_id(rel_path: &str) -> String {
131 format!("file:{}", rel_path)
132}
133
134#[allow(clippy::too_many_arguments)]
135fn extract_nodes(
136 nodes: &mut Vec<NodeDef>,
137 edges: &mut Vec<EdgeDef>,
138 file: &SourceFile,
139 query: &Query,
140 root: tree_sitter::Node,
141 source_bytes: &[u8],
142 kind: NodeKind,
143 prefix: &str,
144 file_id: &str,
145) {
146 let mut cursor = QueryCursor::new();
147 for m in cursor.matches(query, root, source_bytes) {
148 let Some(name_capture) = m
149 .captures
150 .iter()
151 .find(|c| query.capture_names()[c.index as usize] == "name")
152 else {
153 continue;
154 };
155
156 let name = node_text(name_capture.node, source_bytes);
157 let node_start = name_capture.node.start_position();
158
159 let body_end = m
160 .captures
161 .iter()
162 .find(|c| {
163 let cap_name = &query.capture_names()[c.index as usize];
164 *cap_name == "fn" || *cap_name == "cls"
165 })
166 .map(|c| c.node.end_position())
167 .unwrap_or_else(|| name_capture.node.end_position());
168
169 let id = format!("{}:{}:{}", prefix, file.relative_path, name);
170
171 nodes.push(NodeDef {
172 id: id.clone(),
173 kind: kind.clone(),
174 name: name.clone(),
175 path: file.relative_path.clone(),
176 line_start: node_start.row as u32 + 1,
177 line_end: body_end.row as u32 + 1,
178 ..Default::default()
179 });
180
181 edges.push(EdgeDef {
182 src: file_id.to_string(),
183 dst: id,
184 kind: EdgeKind::Exports,
185 ..Default::default()
186 });
187 }
188}
189
190fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
191 node.utf8_text(source).unwrap_or("").to_string()
192}
193
194fn extract_includes(
195 edges: &mut Vec<EdgeDef>,
196 root: tree_sitter::Node,
197 source_bytes: &[u8],
198 file_id: &str,
199 file: &SourceFile,
200) {
201 let mut cursor = root.walk();
202 traverse_includes(edges, root, source_bytes, file_id, file, &mut cursor);
203}
204
205fn traverse_includes(
206 edges: &mut Vec<EdgeDef>,
207 node: tree_sitter::Node,
208 source_bytes: &[u8],
209 file_id: &str,
210 file: &SourceFile,
211 cursor: &mut tree_sitter::TreeCursor,
212) {
213 if node.kind() == "include_expression" || node.kind() == "require_expression" {
215 for j in 0..node.child_count() {
216 let Some(child) = node.child(j) else { continue };
217 if child.kind() == "string" {
218 let include_path = unquote_str(&source_bytes[child.byte_range()]);
219 if !include_path.is_empty() {
220 let resolved = resolve_include_path(&file.relative_path, &include_path);
221 if !resolved.is_empty() {
222 edges.push(EdgeDef {
223 src: file_id.to_string(),
224 dst: file_node_id(&resolved),
225 kind: EdgeKind::Imports,
226 ..Default::default()
227 });
228 }
229 }
230 }
231 }
232 }
233
234 if cursor.goto_first_child() {
235 loop {
236 let child = cursor.node();
237 traverse_includes(edges, child, source_bytes, file_id, file, cursor);
238 if !cursor.goto_next_sibling() {
239 break;
240 }
241 }
242 cursor.goto_parent();
243 }
244}
245
246fn unquote_str(s: &[u8]) -> String {
247 let s = std::str::from_utf8(s).unwrap_or("");
248 s.trim().trim_matches('\'').trim_matches('"').to_string()
249}
250
251fn resolve_include_path(current: &str, import: &str) -> String {
252 let mut parts: Vec<&str> = current.split('/').collect();
253 parts.pop(); for segment in import.split('/') {
256 match segment {
257 "." => {}
258 ".." => {
259 parts.pop();
260 }
261 _ => parts.push(segment),
262 }
263 }
264
265 parts.join("/")
266}
267
268fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
269 let mut fn_stack: Vec<String> = Vec::new();
270 walk_for_calls(edges, root, source, file, &mut fn_stack);
271}
272
273fn is_fn_node(kind: &str) -> bool {
274 matches!(
275 kind,
276 "function_definition" | "method_declaration" | "anonymous_function_creation_expression"
277 )
278}
279
280fn fn_name_from_node(node: Node, source: &[u8], file: &SourceFile) -> Option<String> {
281 if let Some(name_node) = node.child_by_field_name("name") {
282 let name = name_node.utf8_text(source).unwrap_or("").to_string();
283 if !name.is_empty() {
284 return Some(format!("fn:{}:{}", file.relative_path, name));
285 }
286 }
287 None
288}
289
290fn walk_for_calls(
291 edges: &mut Vec<EdgeDef>,
292 node: Node,
293 source: &[u8],
294 file: &SourceFile,
295 fn_stack: &mut Vec<String>,
296) {
297 let kind = node.kind();
298 let pushed = is_fn_node(kind);
299
300 if pushed {
301 if let Some(id) = fn_name_from_node(node, source, file) {
302 fn_stack.push(id);
303 } else {
304 fn_stack.push(String::new());
305 }
306 }
307
308 if kind == "function_call_expression" {
309 if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
310 let callee_name = node
311 .child_by_field_name("function")
312 .and_then(|func| match func.kind() {
313 "name" => Some(func.utf8_text(source).unwrap_or("").to_string()),
314 "qualified_name" => {
315 Some(func.utf8_text(source).unwrap_or("").to_string())
317 }
318 "member_access_expression" => func
319 .child_by_field_name("name")
320 .map(|p| p.utf8_text(source).unwrap_or("").to_string()),
321 _ => None,
322 })
323 .unwrap_or_default();
324
325 if !callee_name.is_empty() {
326 edges.push(EdgeDef {
327 src: caller_id.clone(),
328 dst: callee_name,
329 kind: EdgeKind::Calls,
330 confidence: 0.7,
331 ..Default::default()
332 });
333 }
334 }
335 }
336
337 let mut cursor = node.walk();
338 if cursor.goto_first_child() {
339 loop {
340 walk_for_calls(edges, cursor.node(), source, file, fn_stack);
341 if !cursor.goto_next_sibling() {
342 break;
343 }
344 }
345 }
346
347 if pushed {
348 fn_stack.pop();
349 }
350}