1use similarity_core::language_parser::{
2 GenericFunctionDef, GenericTypeDef, Language, LanguageParser,
3};
4use similarity_core::tree::TreeNode;
5use std::error::Error;
6use std::rc::Rc;
7use tree_sitter::{Node, Parser};
8
9pub struct PhpParser {
10 parser: Parser,
11}
12
13impl PhpParser {
14 #[allow(dead_code)]
15 pub fn new() -> Result<Self, Box<dyn Error + Send + Sync>> {
16 let mut parser = Parser::new();
17 parser.set_language(&tree_sitter_php::LANGUAGE_PHP.into())?;
18
19 Ok(Self { parser })
20 }
21
22 #[allow(clippy::only_used_in_recursion)]
23 fn convert_node(&self, node: Node, source: &str, id_counter: &mut usize) -> TreeNode {
24 let current_id = *id_counter;
25 *id_counter += 1;
26
27 let label = node.kind().to_string();
28 let value = match node.kind() {
29 "name" | "variable_name" | "string" | "integer" | "float" | "true" | "false" | "null" => {
30 node.utf8_text(source.as_bytes()).unwrap_or("").to_string()
31 }
32 _ => "".to_string(),
33 };
34
35 let mut tree_node = TreeNode::new(label, value, current_id);
36
37 for child in node.children(&mut node.walk()) {
38 let child_node = self.convert_node(child, source, id_counter);
39 tree_node.add_child(Rc::new(child_node));
40 }
41
42 tree_node
43 }
44
45 fn extract_functions_from_node(
46 &self,
47 node: Node,
48 source: &str,
49 class_name: Option<&str>,
50 namespace: Option<&str>,
51 ) -> Vec<GenericFunctionDef> {
52 let mut functions = Vec::new();
53
54 let mut current_namespace: Option<String> = None;
56 let mut cursor = node.walk();
57 for child in node.children(&mut cursor) {
58 if child.kind() == "namespace_definition" {
59 if let Some(name_node) = child.child_by_field_name("name") {
60 if let Ok(ns_name) = name_node.utf8_text(source.as_bytes()) {
61 current_namespace = Some(ns_name.to_string());
62 break;
63 }
64 }
65 }
66 }
67
68 fn visit_node(
69 node: Node,
70 source: &str,
71 functions: &mut Vec<GenericFunctionDef>,
72 class_name: Option<&str>,
73 namespace: Option<&str>,
74 ) {
75 match node.kind() {
76 "function_definition" => {
77 if let Some(name_node) = node.child_by_field_name("name") {
78 if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
79 let parameters_node = node.child_by_field_name("parameters");
80 let body_node = node.child_by_field_name("body");
81
82 let params = extract_params(parameters_node, source);
83 let full_name = if let Some(ns) = namespace {
84 format!("{ns}\\{name}")
85 } else {
86 name.to_string()
87 };
88
89 functions.push(GenericFunctionDef {
90 name: full_name,
91 start_line: node.start_position().row as u32 + 1,
92 end_line: node.end_position().row as u32 + 1,
93 body_start_line: body_node
94 .map(|n| n.start_position().row as u32 + 1)
95 .unwrap_or(0),
96 body_end_line: body_node
97 .map(|n| n.end_position().row as u32 + 1)
98 .unwrap_or(0),
99 parameters: params,
100 is_method: class_name.is_some(),
101 class_name: class_name.map(|s| s.to_string()),
102 is_async: false, is_generator: is_generator_function(node, source),
104 decorators: Vec::new(), });
106 }
107 }
108 }
109 "method_declaration" => {
110 if let Some(name_node) = node.child_by_field_name("name") {
111 if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
112 let parameters_node = node.child_by_field_name("parameters");
113 let body_node = node.child_by_field_name("body");
114
115 let params = extract_params(parameters_node, source);
116 let visibility = extract_visibility(node, source);
117 let is_static = is_static_method(node, source);
118 let is_abstract = is_abstract_method(node, source);
119
120 let method_name = format!("{}::{}", class_name.unwrap_or(""), name);
121
122 functions.push(GenericFunctionDef {
123 name: method_name,
124 start_line: node.start_position().row as u32 + 1,
125 end_line: node.end_position().row as u32 + 1,
126 body_start_line: body_node
127 .map(|n| n.start_position().row as u32 + 1)
128 .unwrap_or(0),
129 body_end_line: body_node
130 .map(|n| n.end_position().row as u32 + 1)
131 .unwrap_or(0),
132 parameters: params,
133 is_method: true,
134 class_name: class_name.map(|s| s.to_string()),
135 is_async: false,
136 is_generator: is_generator_function(node, source),
137 decorators: vec![
138 visibility,
139 if is_static { "static".to_string() } else { "".to_string() },
140 if is_abstract { "abstract".to_string() } else { "".to_string() },
141 ].into_iter().filter(|s| !s.is_empty()).collect(),
142 });
143 }
144 }
145 }
146 "class_declaration" => {
147 if let Some(name_node) = node.child_by_field_name("name") {
148 if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
149 let mut subcursor = node.walk();
150 for child in node.children(&mut subcursor) {
151 visit_node(child, source, functions, Some(name), namespace);
152 }
153 }
154 }
155 }
156 "namespace_definition" => {
157 if let Some(name_node) = node.child_by_field_name("name") {
159 if let Ok(_ns_name) = name_node.utf8_text(source.as_bytes()) {
160 }
162 }
163 }
164 "namespace_use_declaration" => {
165 }
167 _ => {
168 let mut subcursor = node.walk();
169 for child in node.children(&mut subcursor) {
170 visit_node(child, source, functions, class_name, namespace);
171 }
172 }
173 }
174 }
175
176 fn is_generator_function(node: Node, source: &str) -> bool {
177 if let Some(body) = node.child_by_field_name("body") {
178 if let Ok(body_text) = body.utf8_text(source.as_bytes()) {
179 return body_text.contains("yield");
180 }
181 }
182 false
183 }
184
185 fn extract_visibility(node: Node, source: &str) -> String {
186 let mut cursor = node.walk();
187 for child in node.children(&mut cursor) {
188 if child.kind() == "visibility_modifier" {
189 if let Ok(visibility) = child.utf8_text(source.as_bytes()) {
190 return visibility.to_string();
191 }
192 }
193 }
194 "public".to_string() }
196
197 fn is_static_method(node: Node, _source: &str) -> bool {
198 let mut cursor = node.walk();
199 for child in node.children(&mut cursor) {
200 if child.kind() == "static_modifier" {
201 return true;
202 }
203 }
204 false
205 }
206
207 fn is_abstract_method(node: Node, _source: &str) -> bool {
208 let mut cursor = node.walk();
209 for child in node.children(&mut cursor) {
210 if child.kind() == "abstract_modifier" {
211 return true;
212 }
213 }
214 false
215 }
216
217 fn extract_params(params_node: Option<Node>, source: &str) -> Vec<String> {
218 if let Some(node) = params_node {
219 let mut params = Vec::new();
220 let mut cursor = node.walk();
221
222 for child in node.children(&mut cursor) {
223 match child.kind() {
224 "simple_parameter" => {
225 if let Some(var_node) = child.child_by_field_name("name") {
226 if let Ok(param_text) = var_node.utf8_text(source.as_bytes()) {
227 params.push(param_text.to_string());
228 }
229 }
230 }
231 "typed_parameter" => {
232 if let Some(var_node) = child.child_by_field_name("name") {
233 if let Ok(param_text) = var_node.utf8_text(source.as_bytes()) {
234 params.push(param_text.to_string());
235 }
236 }
237 }
238 "variadic_parameter" => {
239 if let Some(var_node) = child.child_by_field_name("name") {
240 if let Ok(param_text) = var_node.utf8_text(source.as_bytes()) {
241 params.push(format!("..{param_text}"));
242 }
243 }
244 }
245 _ => {}
246 }
247 }
248
249 params
250 } else {
251 Vec::new()
252 }
253 }
254
255 let final_namespace = current_namespace.as_deref().or(namespace);
256 visit_node(node, source, &mut functions, class_name, final_namespace);
257 functions
258 }
259}
260
261impl LanguageParser for PhpParser {
262 fn parse(&mut self, source: &str, _filename: &str) -> Result<Rc<TreeNode>, Box<dyn Error + Send + Sync>> {
263 let tree = self.parser.parse(source, None).ok_or_else(|| -> Box<dyn Error + Send + Sync> {
264 "Failed to parse PHP source".into()
265 })?;
266
267 let root_node = tree.root_node();
268 let mut id_counter = 0;
269 Ok(Rc::new(self.convert_node(root_node, source, &mut id_counter)))
270 }
271
272 fn extract_functions(
273 &mut self,
274 source: &str,
275 _filename: &str,
276 ) -> Result<Vec<GenericFunctionDef>, Box<dyn Error + Send + Sync>> {
277 let tree = self.parser.parse(source, None).ok_or_else(|| -> Box<dyn Error + Send + Sync> {
278 "Failed to parse PHP source".into()
279 })?;
280
281 let root_node = tree.root_node();
282 Ok(self.extract_functions_from_node(root_node, source, None, None))
283 }
284
285 fn extract_types(
286 &mut self,
287 source: &str,
288 _filename: &str,
289 ) -> Result<Vec<GenericTypeDef>, Box<dyn Error + Send + Sync>> {
290 let tree = self.parser.parse(source, None).ok_or_else(|| -> Box<dyn Error + Send + Sync> {
291 "Failed to parse PHP source".into()
292 })?;
293
294 let root_node = tree.root_node();
295 let mut types = Vec::new();
296
297 fn visit_node_for_types(node: Node, source: &str, types: &mut Vec<GenericTypeDef>) {
298 match node.kind() {
299 "class_declaration" => {
300 if let Some(name_node) = node.child_by_field_name("name") {
301 if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
302 types.push(GenericTypeDef {
303 name: name.to_string(),
304 kind: "class".to_string(),
305 start_line: node.start_position().row as u32 + 1,
306 end_line: node.end_position().row as u32 + 1,
307 fields: extract_class_properties(node, source),
308 });
309 }
310 }
311 }
312 "interface_declaration" => {
313 if let Some(name_node) = node.child_by_field_name("name") {
314 if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
315 types.push(GenericTypeDef {
316 name: name.to_string(),
317 kind: "interface".to_string(),
318 start_line: node.start_position().row as u32 + 1,
319 end_line: node.end_position().row as u32 + 1,
320 fields: extract_interface_methods(node, source),
321 });
322 }
323 }
324 }
325 "trait_declaration" => {
326 if let Some(name_node) = node.child_by_field_name("name") {
327 if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
328 types.push(GenericTypeDef {
329 name: name.to_string(),
330 kind: "trait".to_string(),
331 start_line: node.start_position().row as u32 + 1,
332 end_line: node.end_position().row as u32 + 1,
333 fields: extract_trait_methods(node, source),
334 });
335 }
336 }
337 }
338 _ => {}
339 }
340
341 let mut cursor = node.walk();
342 for child in node.children(&mut cursor) {
343 visit_node_for_types(child, source, types);
344 }
345 }
346
347 fn extract_class_properties(node: Node, source: &str) -> Vec<String> {
348 let mut properties = Vec::new();
349
350 if let Some(body) = node.child_by_field_name("body") {
351 let mut cursor = body.walk();
352 for child in body.children(&mut cursor) {
353 if child.kind() == "property_declaration" {
354 let mut prop_cursor = child.walk();
355 for prop_child in child.children(&mut prop_cursor) {
356 if prop_child.kind() == "variable_name" {
357 if let Ok(prop_name) = prop_child.utf8_text(source.as_bytes()) {
358 properties.push(prop_name.to_string());
359 }
360 }
361 }
362 }
363 }
364 }
365
366 properties
367 }
368
369 fn extract_interface_methods(node: Node, source: &str) -> Vec<String> {
370 let mut methods = Vec::new();
371
372 if let Some(body) = node.child_by_field_name("body") {
373 let mut cursor = body.walk();
374 for child in body.children(&mut cursor) {
375 if child.kind() == "method_declaration" {
376 if let Some(name_node) = child.child_by_field_name("name") {
377 if let Ok(method_name) = name_node.utf8_text(source.as_bytes()) {
378 methods.push(method_name.to_string());
379 }
380 }
381 }
382 }
383 }
384
385 methods
386 }
387
388 fn extract_trait_methods(node: Node, source: &str) -> Vec<String> {
389 let mut methods = Vec::new();
390
391 if let Some(body) = node.child_by_field_name("body") {
392 let mut cursor = body.walk();
393 for child in body.children(&mut cursor) {
394 if child.kind() == "method_declaration" {
395 if let Some(name_node) = child.child_by_field_name("name") {
396 if let Ok(method_name) = name_node.utf8_text(source.as_bytes()) {
397 methods.push(method_name.to_string());
398 }
399 }
400 }
401 }
402 }
403
404 methods
405 }
406
407 visit_node_for_types(root_node, source, &mut types);
408 Ok(types)
409 }
410
411 fn language(&self) -> Language {
412 Language::Php
413 }
414}
415
416#[cfg(test)]
417mod tests {
418 use super::*;
419
420 #[test]
421 fn test_php_functions() {
422 let mut parser = PhpParser::new().unwrap();
423 let source = r#"
424<?php
425function hello($name) {
426 return "Hello, " . $name . "!";
427}
428
429function add($a, $b = 0) {
430 return $a + $b;
431}
432
433class Calculator {
434 public function __construct() {
435 $this->result = 0;
436 }
437
438 public function add($x) {
439 $this->result += $x;
440 return $this->result;
441 }
442
443 private static function multiply($a, $b) {
444 return $a * $b;
445 }
446}
447"#;
448
449 let functions = parser.extract_functions(source, "test.php").unwrap();
450 assert!(functions.len() >= 4);
451
452 let function_names: Vec<&str> = functions.iter().map(|f| f.name.as_str()).collect();
453 assert!(function_names.contains(&"hello"));
454 assert!(function_names.contains(&"add"));
455 assert!(function_names.contains(&"Calculator::__construct"));
456 assert!(function_names.contains(&"Calculator::add"));
457 assert!(function_names.contains(&"Calculator::multiply"));
458 }
459
460 #[test]
461 fn test_php_classes() {
462 let mut parser = PhpParser::new().unwrap();
463 let source = r#"
464<?php
465class User {
466 public $name;
467 private $email;
468
469 public function __construct($name, $email) {
470 $this->name = $name;
471 $this->email = $email;
472 }
473}
474
475interface UserInterface {
476 public function getName();
477}
478
479trait Loggable {
480 public function log($message) {
481 echo $message;
482 }
483}
484"#;
485
486 let types = parser.extract_types(source, "test.php").unwrap();
487 assert_eq!(types.len(), 3);
488 assert_eq!(types[0].name, "User");
489 assert_eq!(types[0].kind, "class");
490 assert_eq!(types[1].name, "UserInterface");
491 assert_eq!(types[1].kind, "interface");
492 assert_eq!(types[2].name, "Loggable");
493 assert_eq!(types[2].kind, "trait");
494 }
495
496 #[test]
497 fn test_php_namespace() {
498 let mut parser = PhpParser::new().unwrap();
499 let source = r#"
500<?php
501namespace App\Controllers;
502
503function processRequest() {
504 return "processed";
505}
506
507class UserController {
508 public function index() {
509 return "user list";
510 }
511}
512"#;
513
514 let functions = parser.extract_functions(source, "test.php").unwrap();
515 assert!(functions.len() >= 2);
516
517 let function_names: Vec<&str> = functions.iter().map(|f| f.name.as_str()).collect();
518 assert!(function_names.contains(&"App\\Controllers\\processRequest"));
519 assert!(function_names.contains(&"UserController::index"));
520 }
521
522 #[test]
523 fn test_php_class_detection() {
524 let mut parser = PhpParser::new().unwrap();
525 let source = r#"
526<?php
527class TestClass {
528 public function method1() {
529 return "test1";
530 }
531
532 public function method2() {
533 return "test2";
534 }
535}
536
537function standalone_function() {
538 return "standalone";
539}
540"#;
541
542 let functions = parser.extract_functions(source, "test.php").unwrap();
543 assert_eq!(functions.len(), 3);
544
545 let method1 = functions.iter().find(|f| f.name.contains("method1")).unwrap();
547 let method2 = functions.iter().find(|f| f.name.contains("method2")).unwrap();
548 let standalone = functions.iter().find(|f| f.name == "standalone_function").unwrap();
549
550 assert_eq!(method1.class_name, Some("TestClass".to_string()));
551 assert_eq!(method2.class_name, Some("TestClass".to_string()));
552 assert_eq!(standalone.class_name, None);
553
554 assert!(method1.is_method);
555 assert!(method2.is_method);
556 assert!(!standalone.is_method);
557 }
558}