1use tree_sitter::{Parser, Query, QueryCursor, Node};
2
3use crate::parser::{EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult};
4use crate::walker::SourceFile;
5
6pub struct PhpParser {
7 language: tree_sitter::Language,
8}
9
10impl PhpParser {
11 pub fn new() -> Self {
12 Self {
13 language: tree_sitter_php::language_php(),
14 }
15 }
16}
17
18impl Default for PhpParser {
19 fn default() -> Self {
20 Self::new()
21 }
22}
23
24impl LanguageParser for PhpParser {
25 fn extensions(&self) -> &[&str] {
26 &["php"]
27 }
28
29 fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
30 let mut parser = Parser::new();
31 parser.set_language(&self.language)?;
32
33 let tree = parser.parse(&file.content, None).ok_or_else(|| {
34 anyhow::anyhow!("failed to parse {}", file.relative_path)
35 })?;
36
37 let source_bytes = file.content.as_bytes();
38 let root = tree.root_node();
39 let mut nodes = Vec::new();
40 let mut edges = Vec::new();
41
42 let fp = file_node_id(&file.relative_path);
43
44 if let Ok(query) = Query::new(
46 &self.language,
47 "(function_definition name: (name) @name) @fn",
48 ) {
49 extract_nodes(
50 &mut nodes, &mut edges, file, &query, root, source_bytes,
51 NodeKind::Function, "fn", &fp,
52 );
53 }
54
55 if let Ok(query) = Query::new(
57 &self.language,
58 "(class_declaration name: (name) @name) @cls",
59 ) {
60 extract_nodes(
61 &mut nodes, &mut edges, file, &query, root, source_bytes,
62 NodeKind::Class, "cls", &fp,
63 );
64 }
65
66 if let Ok(query) = Query::new(
68 &self.language,
69 "(interface_declaration name: (name) @name) @cls",
70 ) {
71 extract_nodes(
72 &mut nodes, &mut edges, file, &query, root, source_bytes,
73 NodeKind::Class, "cls", &fp,
74 );
75 }
76
77 if let Ok(query) = Query::new(
79 &self.language,
80 "(method_declaration name: (name) @name) @fn",
81 ) {
82 extract_nodes(
83 &mut nodes, &mut edges, file, &query, root, source_bytes,
84 NodeKind::Function, "fn", &fp,
85 );
86 }
87
88 extract_includes(&mut edges, root, source_bytes, &fp, file);
90
91 extract_calls(&mut edges, root, source_bytes, file);
93
94 Ok(ParseResult { nodes, edges })
95 }
96}
97
98fn file_node_id(rel_path: &str) -> String {
99 format!("file:{}", rel_path)
100}
101
102#[allow(clippy::too_many_arguments)]
103fn extract_nodes(
104 nodes: &mut Vec<NodeDef>,
105 edges: &mut Vec<EdgeDef>,
106 file: &SourceFile,
107 query: &Query,
108 root: tree_sitter::Node,
109 source_bytes: &[u8],
110 kind: NodeKind,
111 prefix: &str,
112 file_id: &str,
113) {
114 let mut cursor = QueryCursor::new();
115 for m in cursor.matches(query, root, source_bytes) {
116 let Some(name_capture) = m
117 .captures
118 .iter()
119 .find(|c| query.capture_names()[c.index as usize] == "name")
120 else {
121 continue;
122 };
123
124 let name = node_text(name_capture.node, source_bytes);
125 let node_start = name_capture.node.start_position();
126
127 let body_end = m
128 .captures
129 .iter()
130 .find(|c| {
131 let cap_name = &query.capture_names()[c.index as usize];
132 *cap_name == "fn" || *cap_name == "cls"
133 })
134 .map(|c| c.node.end_position())
135 .unwrap_or_else(|| name_capture.node.end_position());
136
137 let id = format!("{}:{}:{}", prefix, file.relative_path, name);
138
139 nodes.push(NodeDef {
140 id: id.clone(),
141 kind: kind.clone(),
142 name: name.clone(),
143 path: file.relative_path.clone(),
144 line_start: node_start.row as u32 + 1,
145 line_end: body_end.row as u32 + 1,
146 ..Default::default()
147 });
148
149 edges.push(EdgeDef {
150 src: file_id.to_string(),
151 dst: id,
152 kind: EdgeKind::Exports,
153 ..Default::default()
154 });
155 }
156}
157
158fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
159 node.utf8_text(source).unwrap_or("").to_string()
160}
161
162fn extract_includes(
163 edges: &mut Vec<EdgeDef>,
164 root: tree_sitter::Node,
165 source_bytes: &[u8],
166 file_id: &str,
167 file: &SourceFile,
168) {
169 let mut cursor = root.walk();
170 traverse_includes(edges, root, source_bytes, file_id, file, &mut cursor);
171}
172
173fn traverse_includes(
174 edges: &mut Vec<EdgeDef>,
175 node: tree_sitter::Node,
176 source_bytes: &[u8],
177 file_id: &str,
178 file: &SourceFile,
179 cursor: &mut tree_sitter::TreeCursor,
180) {
181 if node.kind() == "include_expression" || node.kind() == "require_expression" {
183 for j in 0..node.child_count() {
184 let Some(child) = node.child(j) else { continue };
185 if child.kind() == "string" {
186 let include_path = unquote_str(&source_bytes[child.byte_range()]);
187 if !include_path.is_empty() {
188 let resolved = resolve_include_path(&file.relative_path, &include_path);
189 if !resolved.is_empty() {
190 edges.push(EdgeDef {
191 src: file_id.to_string(),
192 dst: file_node_id(&resolved),
193 kind: EdgeKind::Imports,
194 ..Default::default()
195 });
196 }
197 }
198 }
199 }
200 }
201
202 if cursor.goto_first_child() {
203 loop {
204 let child = cursor.node();
205 traverse_includes(edges, child, source_bytes, file_id, file, cursor);
206 if !cursor.goto_next_sibling() {
207 break;
208 }
209 }
210 cursor.goto_parent();
211 }
212}
213
214fn unquote_str(s: &[u8]) -> String {
215 let s = std::str::from_utf8(s).unwrap_or("");
216 s.trim().trim_matches('\'').trim_matches('"').to_string()
217}
218
219fn resolve_include_path(current: &str, import: &str) -> String {
220 let mut parts: Vec<&str> = current.split('/').collect();
221 parts.pop(); for segment in import.split('/') {
224 match segment {
225 "." => {}
226 ".." => {
227 parts.pop();
228 }
229 _ => parts.push(segment),
230 }
231 }
232
233 parts.join("/")
234}
235
236fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
237 let mut fn_stack: Vec<String> = Vec::new();
238 walk_for_calls(edges, root, source, file, &mut fn_stack);
239}
240
241fn is_fn_node(kind: &str) -> bool {
242 matches!(kind, "function_definition" | "method_declaration" | "anonymous_function_creation_expression")
243}
244
245fn fn_name_from_node(node: Node, source: &[u8], file: &SourceFile) -> Option<String> {
246 if let Some(name_node) = node.child_by_field_name("name") {
247 let name = name_node.utf8_text(source).unwrap_or("").to_string();
248 if !name.is_empty() {
249 return Some(format!("fn:{}:{}", file.relative_path, name));
250 }
251 }
252 None
253}
254
255fn walk_for_calls(
256 edges: &mut Vec<EdgeDef>,
257 node: Node,
258 source: &[u8],
259 file: &SourceFile,
260 fn_stack: &mut Vec<String>,
261) {
262 let kind = node.kind();
263 let pushed = is_fn_node(kind);
264
265 if pushed {
266 if let Some(id) = fn_name_from_node(node, source, file) {
267 fn_stack.push(id);
268 } else {
269 fn_stack.push(String::new());
270 }
271 }
272
273 if kind == "function_call_expression" {
274 if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
275 let callee_name = node
276 .child_by_field_name("function")
277 .and_then(|func| match func.kind() {
278 "name" => Some(func.utf8_text(source).unwrap_or("").to_string()),
279 "qualified_name" => {
280 Some(func.utf8_text(source).unwrap_or("").to_string())
282 }
283 "member_access_expression" => func
284 .child_by_field_name("name")
285 .map(|p| p.utf8_text(source).unwrap_or("").to_string()),
286 _ => None,
287 })
288 .unwrap_or_default();
289
290 if !callee_name.is_empty() {
291 edges.push(EdgeDef {
292 src: caller_id.clone(),
293 dst: callee_name,
294 kind: EdgeKind::Calls,
295 confidence: 0.7,
296 ..Default::default()
297 });
298 }
299 }
300 }
301
302 let mut cursor = node.walk();
303 if cursor.goto_first_child() {
304 loop {
305 walk_for_calls(edges, cursor.node(), source, file, fn_stack);
306 if !cursor.goto_next_sibling() {
307 break;
308 }
309 }
310 }
311
312 if pushed {
313 fn_stack.pop();
314 }
315}