1use tree_sitter::{Node, Parser, Query, QueryCursor};
2
3use crate::parser::{EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult};
4use crate::walker::SourceFile;
5
6pub struct PhpParser {
7 language: tree_sitter::Language,
8}
9
10impl PhpParser {
11 pub fn new() -> Self {
12 Self {
13 language: tree_sitter_php::language_php(),
14 }
15 }
16}
17
18impl Default for PhpParser {
19 fn default() -> Self {
20 Self::new()
21 }
22}
23
24impl LanguageParser for PhpParser {
25 fn extensions(&self) -> &[&str] {
26 &["php"]
27 }
28
29 fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
30 let mut parser = Parser::new();
31 parser.set_language(&self.language)?;
32
33 let tree = parser
34 .parse(&file.content, None)
35 .ok_or_else(|| anyhow::anyhow!("failed to parse {}", file.relative_path))?;
36
37 let source_bytes = file.content.as_bytes();
38 let root = tree.root_node();
39 let mut nodes = Vec::new();
40 let mut edges = Vec::new();
41
42 let fp = file_node_id(&file.relative_path);
43
44 if let Ok(query) = Query::new(
46 &self.language,
47 "(function_definition name: (name) @name) @fn",
48 ) {
49 extract_nodes(
50 &mut nodes,
51 &mut edges,
52 file,
53 &query,
54 root,
55 source_bytes,
56 NodeKind::Function,
57 "fn",
58 &fp,
59 );
60 }
61
62 if let Ok(query) = Query::new(
64 &self.language,
65 "(class_declaration name: (name) @name) @cls",
66 ) {
67 extract_nodes(
68 &mut nodes,
69 &mut edges,
70 file,
71 &query,
72 root,
73 source_bytes,
74 NodeKind::Class,
75 "cls",
76 &fp,
77 );
78 }
79
80 if let Ok(query) = Query::new(
82 &self.language,
83 "(interface_declaration name: (name) @name) @cls",
84 ) {
85 extract_nodes(
86 &mut nodes,
87 &mut edges,
88 file,
89 &query,
90 root,
91 source_bytes,
92 NodeKind::Class,
93 "cls",
94 &fp,
95 );
96 }
97
98 if let Ok(query) = Query::new(
100 &self.language,
101 "(method_declaration name: (name) @name) @fn",
102 ) {
103 extract_nodes(
104 &mut nodes,
105 &mut edges,
106 file,
107 &query,
108 root,
109 source_bytes,
110 NodeKind::Function,
111 "fn",
112 &fp,
113 );
114 }
115
116 extract_includes(&mut edges, root, source_bytes, &fp, file);
118
119 extract_calls(&mut edges, root, source_bytes, file);
121
122 Ok(ParseResult { nodes, edges })
123 }
124}
125
126fn file_node_id(rel_path: &str) -> String {
127 format!("file:{}", rel_path)
128}
129
130#[allow(clippy::too_many_arguments)]
131fn extract_nodes(
132 nodes: &mut Vec<NodeDef>,
133 edges: &mut Vec<EdgeDef>,
134 file: &SourceFile,
135 query: &Query,
136 root: tree_sitter::Node,
137 source_bytes: &[u8],
138 kind: NodeKind,
139 prefix: &str,
140 file_id: &str,
141) {
142 let mut cursor = QueryCursor::new();
143 for m in cursor.matches(query, root, source_bytes) {
144 let Some(name_capture) = m
145 .captures
146 .iter()
147 .find(|c| query.capture_names()[c.index as usize] == "name")
148 else {
149 continue;
150 };
151
152 let name = node_text(name_capture.node, source_bytes);
153 let node_start = name_capture.node.start_position();
154
155 let body_end = m
156 .captures
157 .iter()
158 .find(|c| {
159 let cap_name = &query.capture_names()[c.index as usize];
160 *cap_name == "fn" || *cap_name == "cls"
161 })
162 .map(|c| c.node.end_position())
163 .unwrap_or_else(|| name_capture.node.end_position());
164
165 let id = format!("{}:{}:{}", prefix, file.relative_path, name);
166
167 nodes.push(NodeDef {
168 id: id.clone(),
169 kind: kind.clone(),
170 name: name.clone(),
171 path: file.relative_path.clone(),
172 line_start: node_start.row as u32 + 1,
173 line_end: body_end.row as u32 + 1,
174 ..Default::default()
175 });
176
177 edges.push(EdgeDef {
178 src: file_id.to_string(),
179 dst: id,
180 kind: EdgeKind::Exports,
181 ..Default::default()
182 });
183 }
184}
185
186fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
187 node.utf8_text(source).unwrap_or("").to_string()
188}
189
190fn extract_includes(
191 edges: &mut Vec<EdgeDef>,
192 root: tree_sitter::Node,
193 source_bytes: &[u8],
194 file_id: &str,
195 file: &SourceFile,
196) {
197 let mut cursor = root.walk();
198 traverse_includes(edges, root, source_bytes, file_id, file, &mut cursor);
199}
200
201fn traverse_includes(
202 edges: &mut Vec<EdgeDef>,
203 node: tree_sitter::Node,
204 source_bytes: &[u8],
205 file_id: &str,
206 file: &SourceFile,
207 cursor: &mut tree_sitter::TreeCursor,
208) {
209 if node.kind() == "include_expression" || node.kind() == "require_expression" {
211 for j in 0..node.child_count() {
212 let Some(child) = node.child(j) else { continue };
213 if child.kind() == "string" {
214 let include_path = unquote_str(&source_bytes[child.byte_range()]);
215 if !include_path.is_empty() {
216 let resolved = resolve_include_path(&file.relative_path, &include_path);
217 if !resolved.is_empty() {
218 edges.push(EdgeDef {
219 src: file_id.to_string(),
220 dst: file_node_id(&resolved),
221 kind: EdgeKind::Imports,
222 ..Default::default()
223 });
224 }
225 }
226 }
227 }
228 }
229
230 if cursor.goto_first_child() {
231 loop {
232 let child = cursor.node();
233 traverse_includes(edges, child, source_bytes, file_id, file, cursor);
234 if !cursor.goto_next_sibling() {
235 break;
236 }
237 }
238 cursor.goto_parent();
239 }
240}
241
242fn unquote_str(s: &[u8]) -> String {
243 let s = std::str::from_utf8(s).unwrap_or("");
244 s.trim().trim_matches('\'').trim_matches('"').to_string()
245}
246
247fn resolve_include_path(current: &str, import: &str) -> String {
248 let mut parts: Vec<&str> = current.split('/').collect();
249 parts.pop(); for segment in import.split('/') {
252 match segment {
253 "." => {}
254 ".." => {
255 parts.pop();
256 }
257 _ => parts.push(segment),
258 }
259 }
260
261 parts.join("/")
262}
263
264fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
265 let mut fn_stack: Vec<String> = Vec::new();
266 walk_for_calls(edges, root, source, file, &mut fn_stack);
267}
268
269fn is_fn_node(kind: &str) -> bool {
270 matches!(
271 kind,
272 "function_definition" | "method_declaration" | "anonymous_function_creation_expression"
273 )
274}
275
276fn fn_name_from_node(node: Node, source: &[u8], file: &SourceFile) -> Option<String> {
277 if let Some(name_node) = node.child_by_field_name("name") {
278 let name = name_node.utf8_text(source).unwrap_or("").to_string();
279 if !name.is_empty() {
280 return Some(format!("fn:{}:{}", file.relative_path, name));
281 }
282 }
283 None
284}
285
286fn walk_for_calls(
287 edges: &mut Vec<EdgeDef>,
288 node: Node,
289 source: &[u8],
290 file: &SourceFile,
291 fn_stack: &mut Vec<String>,
292) {
293 let kind = node.kind();
294 let pushed = is_fn_node(kind);
295
296 if pushed {
297 if let Some(id) = fn_name_from_node(node, source, file) {
298 fn_stack.push(id);
299 } else {
300 fn_stack.push(String::new());
301 }
302 }
303
304 if kind == "function_call_expression" {
305 if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
306 let callee_name = node
307 .child_by_field_name("function")
308 .and_then(|func| match func.kind() {
309 "name" => Some(func.utf8_text(source).unwrap_or("").to_string()),
310 "qualified_name" => {
311 Some(func.utf8_text(source).unwrap_or("").to_string())
313 }
314 "member_access_expression" => func
315 .child_by_field_name("name")
316 .map(|p| p.utf8_text(source).unwrap_or("").to_string()),
317 _ => None,
318 })
319 .unwrap_or_default();
320
321 if !callee_name.is_empty() {
322 edges.push(EdgeDef {
323 src: caller_id.clone(),
324 dst: callee_name,
325 kind: EdgeKind::Calls,
326 confidence: 0.7,
327 ..Default::default()
328 });
329 }
330 }
331 }
332
333 let mut cursor = node.walk();
334 if cursor.goto_first_child() {
335 loop {
336 walk_for_calls(edges, cursor.node(), source, file, fn_stack);
337 if !cursor.goto_next_sibling() {
338 break;
339 }
340 }
341 }
342
343 if pushed {
344 fn_stack.pop();
345 }
346}