1use tree_sitter::{Node, Parser, Query, QueryCursor};
2
3use crate::parser::{
4 collect_doc_block_above, meta_set, EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind,
5 ParseResult,
6};
7use crate::walker::SourceFile;
8
9pub struct PhpParser {
10 language: tree_sitter::Language,
11}
12
13impl PhpParser {
14 pub fn new() -> Self {
15 Self {
16 language: tree_sitter_php::language_php(),
17 }
18 }
19}
20
21impl Default for PhpParser {
22 fn default() -> Self {
23 Self::new()
24 }
25}
26
27impl LanguageParser for PhpParser {
28 fn extensions(&self) -> &[&str] {
29 &["php"]
30 }
31
32 fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
33 let mut parser = Parser::new();
34 parser.set_language(&self.language)?;
35
36 let tree = parser
37 .parse(&file.content, None)
38 .ok_or_else(|| anyhow::anyhow!("failed to parse {}", file.relative_path))?;
39
40 let source_bytes = file.content.as_bytes();
41 let root = tree.root_node();
42 let mut nodes = Vec::new();
43 let mut edges = Vec::new();
44
45 let fp = file_node_id(&file.relative_path);
46
47 if let Ok(query) = Query::new(
49 &self.language,
50 "(function_definition name: (name) @name) @fn",
51 ) {
52 extract_nodes(
53 &mut nodes,
54 &mut edges,
55 file,
56 &query,
57 root,
58 source_bytes,
59 NodeKind::Function,
60 "fn",
61 &fp,
62 );
63 }
64
65 if let Ok(query) = Query::new(
67 &self.language,
68 "(class_declaration name: (name) @name) @cls",
69 ) {
70 extract_nodes(
71 &mut nodes,
72 &mut edges,
73 file,
74 &query,
75 root,
76 source_bytes,
77 NodeKind::Class,
78 "cls",
79 &fp,
80 );
81 }
82
83 if let Ok(query) = Query::new(
85 &self.language,
86 "(interface_declaration name: (name) @name) @cls",
87 ) {
88 extract_nodes(
89 &mut nodes,
90 &mut edges,
91 file,
92 &query,
93 root,
94 source_bytes,
95 NodeKind::Class,
96 "cls",
97 &fp,
98 );
99 }
100
101 if let Ok(query) = Query::new(
103 &self.language,
104 "(method_declaration name: (name) @name) @fn",
105 ) {
106 extract_nodes(
107 &mut nodes,
108 &mut edges,
109 file,
110 &query,
111 root,
112 source_bytes,
113 NodeKind::Function,
114 "fn",
115 &fp,
116 );
117 }
118
119 extract_includes(&mut edges, root, source_bytes, &fp, file);
121
122 extract_calls(&mut edges, root, source_bytes, file);
124
125 Ok(ParseResult {
126 nodes,
127 edges,
128 ..Default::default()
129 })
130 }
131}
132
133fn file_node_id(rel_path: &str) -> String {
134 format!("file:{}", rel_path)
135}
136
137#[allow(clippy::too_many_arguments)]
138fn extract_nodes(
139 nodes: &mut Vec<NodeDef>,
140 edges: &mut Vec<EdgeDef>,
141 file: &SourceFile,
142 query: &Query,
143 root: tree_sitter::Node,
144 source_bytes: &[u8],
145 kind: NodeKind,
146 prefix: &str,
147 file_id: &str,
148) {
149 let mut cursor = QueryCursor::new();
150 for m in cursor.matches(query, root, source_bytes) {
151 let Some(name_capture) = m
152 .captures
153 .iter()
154 .find(|c| query.capture_names()[c.index as usize] == "name")
155 else {
156 continue;
157 };
158
159 let name = node_text(name_capture.node, source_bytes);
160 let node_start = name_capture.node.start_position();
161
162 let item_node = m
163 .captures
164 .iter()
165 .find(|c| {
166 let cap_name = &query.capture_names()[c.index as usize];
167 *cap_name == "fn" || *cap_name == "cls"
168 })
169 .map(|c| c.node);
170 let body_end = item_node
171 .map(|n| n.end_position())
172 .unwrap_or_else(|| name_capture.node.end_position());
173
174 let id = format!("{}:{}:{}", prefix, file.relative_path, name);
175
176 let doc_comment = item_node
177 .and_then(|n| collect_doc_block_above(n, source_bytes, is_phpdoc_comment))
178 .map(strip_phpdoc_markers);
179
180 let mut def = NodeDef {
181 id: id.clone(),
182 kind: kind.clone(),
183 name: name.clone(),
184 path: file.relative_path.clone(),
185 line_start: node_start.row as u32 + 1,
186 line_end: body_end.row as u32 + 1,
187 ..Default::default()
188 };
189 if let Some(doc) = doc_comment {
190 meta_set(&mut def, "doc_comment", serde_json::Value::String(doc));
191 }
192 nodes.push(def);
193
194 edges.push(EdgeDef {
195 src: file_id.to_string(),
196 dst: id,
197 kind: EdgeKind::Exports,
198 ..Default::default()
199 });
200 }
201}
202
203fn is_phpdoc_comment(text: &str) -> bool {
204 text.trim_start().starts_with("/**")
205}
206
207fn strip_phpdoc_markers(raw: String) -> String {
208 let mut out: Vec<String> = Vec::new();
209 for line in raw.lines() {
210 let l = line.trim();
211 let stripped = if l.starts_with("/**") {
212 l.trim_start_matches("/**")
213 .trim_end_matches("*/")
214 .trim()
215 .to_string()
216 } else if l.starts_with("*/") {
217 String::new()
218 } else if let Some(rest) = l.strip_prefix('*') {
219 rest.trim().to_string()
220 } else {
221 l.to_string()
222 };
223 out.push(stripped);
224 }
225 out.join("\n").trim().to_string()
226}
227
228fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
229 node.utf8_text(source).unwrap_or("").to_string()
230}
231
232fn extract_includes(
233 edges: &mut Vec<EdgeDef>,
234 root: tree_sitter::Node,
235 source_bytes: &[u8],
236 file_id: &str,
237 file: &SourceFile,
238) {
239 let mut cursor = root.walk();
240 traverse_includes(edges, root, source_bytes, file_id, file, &mut cursor);
241}
242
243fn traverse_includes(
244 edges: &mut Vec<EdgeDef>,
245 node: tree_sitter::Node,
246 source_bytes: &[u8],
247 file_id: &str,
248 file: &SourceFile,
249 cursor: &mut tree_sitter::TreeCursor,
250) {
251 if node.kind() == "include_expression" || node.kind() == "require_expression" {
253 for j in 0..node.child_count() {
254 let Some(child) = node.child(j) else { continue };
255 if child.kind() == "string" {
256 let include_path = unquote_str(&source_bytes[child.byte_range()]);
257 if !include_path.is_empty() {
258 let resolved = resolve_include_path(&file.relative_path, &include_path);
259 if !resolved.is_empty() {
260 edges.push(EdgeDef {
261 src: file_id.to_string(),
262 dst: file_node_id(&resolved),
263 kind: EdgeKind::Imports,
264 ..Default::default()
265 });
266 }
267 }
268 }
269 }
270 }
271
272 if cursor.goto_first_child() {
273 loop {
274 let child = cursor.node();
275 traverse_includes(edges, child, source_bytes, file_id, file, cursor);
276 if !cursor.goto_next_sibling() {
277 break;
278 }
279 }
280 cursor.goto_parent();
281 }
282}
283
284fn unquote_str(s: &[u8]) -> String {
285 let s = std::str::from_utf8(s).unwrap_or("");
286 s.trim().trim_matches('\'').trim_matches('"').to_string()
287}
288
289fn resolve_include_path(current: &str, import: &str) -> String {
290 let mut parts: Vec<&str> = current.split('/').collect();
291 parts.pop(); for segment in import.split('/') {
294 match segment {
295 "." => {}
296 ".." => {
297 parts.pop();
298 }
299 _ => parts.push(segment),
300 }
301 }
302
303 parts.join("/")
304}
305
306fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
307 let mut fn_stack: Vec<String> = Vec::new();
308 walk_for_calls(edges, root, source, file, &mut fn_stack);
309}
310
311fn is_fn_node(kind: &str) -> bool {
312 matches!(
313 kind,
314 "function_definition" | "method_declaration" | "anonymous_function_creation_expression"
315 )
316}
317
318fn fn_name_from_node(node: Node, source: &[u8], file: &SourceFile) -> Option<String> {
319 if let Some(name_node) = node.child_by_field_name("name") {
320 let name = name_node.utf8_text(source).unwrap_or("").to_string();
321 if !name.is_empty() {
322 return Some(format!("fn:{}:{}", file.relative_path, name));
323 }
324 }
325 None
326}
327
328fn walk_for_calls(
329 edges: &mut Vec<EdgeDef>,
330 node: Node,
331 source: &[u8],
332 file: &SourceFile,
333 fn_stack: &mut Vec<String>,
334) {
335 let kind = node.kind();
336 let pushed = is_fn_node(kind);
337
338 if pushed {
339 if let Some(id) = fn_name_from_node(node, source, file) {
340 fn_stack.push(id);
341 } else {
342 fn_stack.push(String::new());
343 }
344 }
345
346 if kind == "function_call_expression" {
347 if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
348 let callee_name = node
349 .child_by_field_name("function")
350 .and_then(|func| match func.kind() {
351 "name" => Some(func.utf8_text(source).unwrap_or("").to_string()),
352 "qualified_name" => {
353 Some(func.utf8_text(source).unwrap_or("").to_string())
355 }
356 "member_access_expression" => func
357 .child_by_field_name("name")
358 .map(|p| p.utf8_text(source).unwrap_or("").to_string()),
359 _ => None,
360 })
361 .unwrap_or_default();
362
363 if !callee_name.is_empty() {
364 edges.push(EdgeDef {
365 src: caller_id.clone(),
366 dst: callee_name,
367 kind: EdgeKind::Calls,
368 confidence: 0.7,
369 ..Default::default()
370 });
371 }
372 }
373 }
374
375 let mut cursor = node.walk();
376 if cursor.goto_first_child() {
377 loop {
378 walk_for_calls(edges, cursor.node(), source, file, fn_stack);
379 if !cursor.goto_next_sibling() {
380 break;
381 }
382 }
383 }
384
385 if pushed {
386 fn_stack.pop();
387 }
388}