1use similarity_core::language_parser::{
2 GenericFunctionDef, GenericTypeDef, Language, LanguageParser,
3};
4use similarity_core::tree::TreeNode;
5use std::error::Error;
6use std::rc::Rc;
7use tree_sitter::{Node, Parser};
8
9pub struct PhpParser {
10 parser: Parser,
11}
12
13impl PhpParser {
14 #[allow(dead_code)]
15 pub fn new() -> Result<Self, Box<dyn Error + Send + Sync>> {
16 let mut parser = Parser::new();
17 parser.set_language(&tree_sitter_php::LANGUAGE_PHP.into())?;
18
19 Ok(Self { parser })
20 }
21
22 #[allow(clippy::only_used_in_recursion)]
23 fn convert_node(&self, node: Node, source: &str, id_counter: &mut usize) -> TreeNode {
24 let current_id = *id_counter;
25 *id_counter += 1;
26
27 let label = node.kind().to_string();
28 let value = match node.kind() {
29 "name" | "variable_name" | "string" | "integer" | "float" | "true" | "false"
30 | "null" => node.utf8_text(source.as_bytes()).unwrap_or("").to_string(),
31 _ => "".to_string(),
32 };
33
34 let mut tree_node = TreeNode::new(label, value, current_id);
35
36 for child in node.children(&mut node.walk()) {
37 let child_node = self.convert_node(child, source, id_counter);
38 tree_node.add_child(Rc::new(child_node));
39 }
40
41 tree_node
42 }
43
44 fn extract_functions_from_node(
45 &self,
46 node: Node,
47 source: &str,
48 class_name: Option<&str>,
49 namespace: Option<&str>,
50 ) -> Vec<GenericFunctionDef> {
51 let mut functions = Vec::new();
52
53 let mut current_namespace: Option<String> = None;
55 let mut cursor = node.walk();
56 for child in node.children(&mut cursor) {
57 if child.kind() == "namespace_definition" {
58 if let Some(name_node) = child.child_by_field_name("name") {
59 if let Ok(ns_name) = name_node.utf8_text(source.as_bytes()) {
60 current_namespace = Some(ns_name.to_string());
61 break;
62 }
63 }
64 }
65 }
66
67 fn visit_node(
68 node: Node,
69 source: &str,
70 functions: &mut Vec<GenericFunctionDef>,
71 class_name: Option<&str>,
72 namespace: Option<&str>,
73 ) {
74 match node.kind() {
75 "function_definition" => {
76 if let Some(name_node) = node.child_by_field_name("name") {
77 if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
78 let parameters_node = node.child_by_field_name("parameters");
79 let body_node = node.child_by_field_name("body");
80
81 let params = extract_params(parameters_node, source);
82 let full_name = if let Some(ns) = namespace {
83 format!("{ns}\\{name}")
84 } else {
85 name.to_string()
86 };
87
88 functions.push(GenericFunctionDef {
89 name: full_name,
90 start_line: node.start_position().row as u32 + 1,
91 end_line: node.end_position().row as u32 + 1,
92 body_start_line: body_node
93 .map(|n| n.start_position().row as u32 + 1)
94 .unwrap_or(0),
95 body_end_line: body_node
96 .map(|n| n.end_position().row as u32 + 1)
97 .unwrap_or(0),
98 parameters: params,
99 is_method: class_name.is_some(),
100 class_name: class_name.map(|s| s.to_string()),
101 is_async: false, is_generator: is_generator_function(node, source),
103 decorators: Vec::new(), });
105 }
106 }
107 }
108 "method_declaration" => {
109 if let Some(name_node) = node.child_by_field_name("name") {
110 if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
111 let parameters_node = node.child_by_field_name("parameters");
112 let body_node = node.child_by_field_name("body");
113
114 let params = extract_params(parameters_node, source);
115 let visibility = extract_visibility(node, source);
116 let is_static = is_static_method(node, source);
117 let is_abstract = is_abstract_method(node, source);
118
119 let method_name = format!("{}::{}", class_name.unwrap_or(""), name);
120
121 functions.push(GenericFunctionDef {
122 name: method_name,
123 start_line: node.start_position().row as u32 + 1,
124 end_line: node.end_position().row as u32 + 1,
125 body_start_line: body_node
126 .map(|n| n.start_position().row as u32 + 1)
127 .unwrap_or(0),
128 body_end_line: body_node
129 .map(|n| n.end_position().row as u32 + 1)
130 .unwrap_or(0),
131 parameters: params,
132 is_method: true,
133 class_name: class_name.map(|s| s.to_string()),
134 is_async: false,
135 is_generator: is_generator_function(node, source),
136 decorators: vec![
137 visibility,
138 if is_static { "static".to_string() } else { "".to_string() },
139 if is_abstract {
140 "abstract".to_string()
141 } else {
142 "".to_string()
143 },
144 ]
145 .into_iter()
146 .filter(|s| !s.is_empty())
147 .collect(),
148 });
149 }
150 }
151 }
152 "class_declaration" => {
153 if let Some(name_node) = node.child_by_field_name("name") {
154 if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
155 let mut subcursor = node.walk();
156 for child in node.children(&mut subcursor) {
157 visit_node(child, source, functions, Some(name), namespace);
158 }
159 }
160 }
161 }
162 "namespace_definition" => {
163 if let Some(name_node) = node.child_by_field_name("name") {
165 if let Ok(_ns_name) = name_node.utf8_text(source.as_bytes()) {
166 }
168 }
169 }
170 "namespace_use_declaration" => {
171 }
173 _ => {
174 let mut subcursor = node.walk();
175 for child in node.children(&mut subcursor) {
176 visit_node(child, source, functions, class_name, namespace);
177 }
178 }
179 }
180 }
181
182 fn is_generator_function(node: Node, source: &str) -> bool {
183 if let Some(body) = node.child_by_field_name("body") {
184 if let Ok(body_text) = body.utf8_text(source.as_bytes()) {
185 return body_text.contains("yield");
186 }
187 }
188 false
189 }
190
191 fn extract_visibility(node: Node, source: &str) -> String {
192 let mut cursor = node.walk();
193 for child in node.children(&mut cursor) {
194 if child.kind() == "visibility_modifier" {
195 if let Ok(visibility) = child.utf8_text(source.as_bytes()) {
196 return visibility.to_string();
197 }
198 }
199 }
200 "public".to_string() }
202
203 fn is_static_method(node: Node, _source: &str) -> bool {
204 let mut cursor = node.walk();
205 for child in node.children(&mut cursor) {
206 if child.kind() == "static_modifier" {
207 return true;
208 }
209 }
210 false
211 }
212
213 fn is_abstract_method(node: Node, _source: &str) -> bool {
214 let mut cursor = node.walk();
215 for child in node.children(&mut cursor) {
216 if child.kind() == "abstract_modifier" {
217 return true;
218 }
219 }
220 false
221 }
222
223 fn extract_params(params_node: Option<Node>, source: &str) -> Vec<String> {
224 if let Some(node) = params_node {
225 let mut params = Vec::new();
226 let mut cursor = node.walk();
227
228 for child in node.children(&mut cursor) {
229 match child.kind() {
230 "simple_parameter" => {
231 if let Some(var_node) = child.child_by_field_name("name") {
232 if let Ok(param_text) = var_node.utf8_text(source.as_bytes()) {
233 params.push(param_text.to_string());
234 }
235 }
236 }
237 "typed_parameter" => {
238 if let Some(var_node) = child.child_by_field_name("name") {
239 if let Ok(param_text) = var_node.utf8_text(source.as_bytes()) {
240 params.push(param_text.to_string());
241 }
242 }
243 }
244 "variadic_parameter" => {
245 if let Some(var_node) = child.child_by_field_name("name") {
246 if let Ok(param_text) = var_node.utf8_text(source.as_bytes()) {
247 params.push(format!("..{param_text}"));
248 }
249 }
250 }
251 _ => {}
252 }
253 }
254
255 params
256 } else {
257 Vec::new()
258 }
259 }
260
261 let final_namespace = current_namespace.as_deref().or(namespace);
262 visit_node(node, source, &mut functions, class_name, final_namespace);
263 functions
264 }
265}
266
267impl LanguageParser for PhpParser {
268 fn parse(
269 &mut self,
270 source: &str,
271 _filename: &str,
272 ) -> Result<Rc<TreeNode>, Box<dyn Error + Send + Sync>> {
273 let tree =
274 self.parser.parse(source, None).ok_or_else(|| -> Box<dyn Error + Send + Sync> {
275 "Failed to parse PHP source".into()
276 })?;
277
278 let root_node = tree.root_node();
279 let mut id_counter = 0;
280 Ok(Rc::new(self.convert_node(root_node, source, &mut id_counter)))
281 }
282
283 fn extract_functions(
284 &mut self,
285 source: &str,
286 _filename: &str,
287 ) -> Result<Vec<GenericFunctionDef>, Box<dyn Error + Send + Sync>> {
288 let tree =
289 self.parser.parse(source, None).ok_or_else(|| -> Box<dyn Error + Send + Sync> {
290 "Failed to parse PHP source".into()
291 })?;
292
293 let root_node = tree.root_node();
294 Ok(self.extract_functions_from_node(root_node, source, None, None))
295 }
296
297 fn extract_types(
298 &mut self,
299 source: &str,
300 _filename: &str,
301 ) -> Result<Vec<GenericTypeDef>, Box<dyn Error + Send + Sync>> {
302 let tree =
303 self.parser.parse(source, None).ok_or_else(|| -> Box<dyn Error + Send + Sync> {
304 "Failed to parse PHP source".into()
305 })?;
306
307 let root_node = tree.root_node();
308 let mut types = Vec::new();
309
310 fn visit_node_for_types(node: Node, source: &str, types: &mut Vec<GenericTypeDef>) {
311 match node.kind() {
312 "class_declaration" => {
313 if let Some(name_node) = node.child_by_field_name("name") {
314 if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
315 types.push(GenericTypeDef {
316 name: name.to_string(),
317 kind: "class".to_string(),
318 start_line: node.start_position().row as u32 + 1,
319 end_line: node.end_position().row as u32 + 1,
320 fields: extract_class_properties(node, source),
321 });
322 }
323 }
324 }
325 "interface_declaration" => {
326 if let Some(name_node) = node.child_by_field_name("name") {
327 if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
328 types.push(GenericTypeDef {
329 name: name.to_string(),
330 kind: "interface".to_string(),
331 start_line: node.start_position().row as u32 + 1,
332 end_line: node.end_position().row as u32 + 1,
333 fields: extract_interface_methods(node, source),
334 });
335 }
336 }
337 }
338 "trait_declaration" => {
339 if let Some(name_node) = node.child_by_field_name("name") {
340 if let Ok(name) = name_node.utf8_text(source.as_bytes()) {
341 types.push(GenericTypeDef {
342 name: name.to_string(),
343 kind: "trait".to_string(),
344 start_line: node.start_position().row as u32 + 1,
345 end_line: node.end_position().row as u32 + 1,
346 fields: extract_trait_methods(node, source),
347 });
348 }
349 }
350 }
351 _ => {}
352 }
353
354 let mut cursor = node.walk();
355 for child in node.children(&mut cursor) {
356 visit_node_for_types(child, source, types);
357 }
358 }
359
360 fn extract_class_properties(node: Node, source: &str) -> Vec<String> {
361 let mut properties = Vec::new();
362
363 if let Some(body) = node.child_by_field_name("body") {
364 let mut cursor = body.walk();
365 for child in body.children(&mut cursor) {
366 if child.kind() == "property_declaration" {
367 let mut prop_cursor = child.walk();
368 for prop_child in child.children(&mut prop_cursor) {
369 if prop_child.kind() == "variable_name" {
370 if let Ok(prop_name) = prop_child.utf8_text(source.as_bytes()) {
371 properties.push(prop_name.to_string());
372 }
373 }
374 }
375 }
376 }
377 }
378
379 properties
380 }
381
382 fn extract_interface_methods(node: Node, source: &str) -> Vec<String> {
383 let mut methods = Vec::new();
384
385 if let Some(body) = node.child_by_field_name("body") {
386 let mut cursor = body.walk();
387 for child in body.children(&mut cursor) {
388 if child.kind() == "method_declaration" {
389 if let Some(name_node) = child.child_by_field_name("name") {
390 if let Ok(method_name) = name_node.utf8_text(source.as_bytes()) {
391 methods.push(method_name.to_string());
392 }
393 }
394 }
395 }
396 }
397
398 methods
399 }
400
401 fn extract_trait_methods(node: Node, source: &str) -> Vec<String> {
402 let mut methods = Vec::new();
403
404 if let Some(body) = node.child_by_field_name("body") {
405 let mut cursor = body.walk();
406 for child in body.children(&mut cursor) {
407 if child.kind() == "method_declaration" {
408 if let Some(name_node) = child.child_by_field_name("name") {
409 if let Ok(method_name) = name_node.utf8_text(source.as_bytes()) {
410 methods.push(method_name.to_string());
411 }
412 }
413 }
414 }
415 }
416
417 methods
418 }
419
420 visit_node_for_types(root_node, source, &mut types);
421 Ok(types)
422 }
423
424 fn language(&self) -> Language {
425 Language::Php
426 }
427}
428
429#[cfg(test)]
430mod tests {
431 use super::*;
432
433 #[test]
434 fn test_php_functions() {
435 let mut parser = PhpParser::new().unwrap();
436 let source = r#"
437<?php
438function hello($name) {
439 return "Hello, " . $name . "!";
440}
441
442function add($a, $b = 0) {
443 return $a + $b;
444}
445
446class Calculator {
447 public function __construct() {
448 $this->result = 0;
449 }
450
451 public function add($x) {
452 $this->result += $x;
453 return $this->result;
454 }
455
456 private static function multiply($a, $b) {
457 return $a * $b;
458 }
459}
460"#;
461
462 let functions = parser.extract_functions(source, "test.php").unwrap();
463 assert!(functions.len() >= 4);
464
465 let function_names: Vec<&str> = functions.iter().map(|f| f.name.as_str()).collect();
466 assert!(function_names.contains(&"hello"));
467 assert!(function_names.contains(&"add"));
468 assert!(function_names.contains(&"Calculator::__construct"));
469 assert!(function_names.contains(&"Calculator::add"));
470 assert!(function_names.contains(&"Calculator::multiply"));
471 }
472
473 #[test]
474 fn test_php_classes() {
475 let mut parser = PhpParser::new().unwrap();
476 let source = r#"
477<?php
478class User {
479 public $name;
480 private $email;
481
482 public function __construct($name, $email) {
483 $this->name = $name;
484 $this->email = $email;
485 }
486}
487
488interface UserInterface {
489 public function getName();
490}
491
492trait Loggable {
493 public function log($message) {
494 echo $message;
495 }
496}
497"#;
498
499 let types = parser.extract_types(source, "test.php").unwrap();
500 assert_eq!(types.len(), 3);
501 assert_eq!(types[0].name, "User");
502 assert_eq!(types[0].kind, "class");
503 assert_eq!(types[1].name, "UserInterface");
504 assert_eq!(types[1].kind, "interface");
505 assert_eq!(types[2].name, "Loggable");
506 assert_eq!(types[2].kind, "trait");
507 }
508
509 #[test]
510 fn test_php_namespace() {
511 let mut parser = PhpParser::new().unwrap();
512 let source = r#"
513<?php
514namespace App\Controllers;
515
516function processRequest() {
517 return "processed";
518}
519
520class UserController {
521 public function index() {
522 return "user list";
523 }
524}
525"#;
526
527 let functions = parser.extract_functions(source, "test.php").unwrap();
528 assert!(functions.len() >= 2);
529
530 let function_names: Vec<&str> = functions.iter().map(|f| f.name.as_str()).collect();
531 assert!(function_names.contains(&"App\\Controllers\\processRequest"));
532 assert!(function_names.contains(&"UserController::index"));
533 }
534
535 #[test]
536 fn test_php_class_detection() {
537 let mut parser = PhpParser::new().unwrap();
538 let source = r#"
539<?php
540class TestClass {
541 public function method1() {
542 return "test1";
543 }
544
545 public function method2() {
546 return "test2";
547 }
548}
549
550function standalone_function() {
551 return "standalone";
552}
553"#;
554
555 let functions = parser.extract_functions(source, "test.php").unwrap();
556 assert_eq!(functions.len(), 3);
557
558 let method1 = functions.iter().find(|f| f.name.contains("method1")).unwrap();
560 let method2 = functions.iter().find(|f| f.name.contains("method2")).unwrap();
561 let standalone = functions.iter().find(|f| f.name == "standalone_function").unwrap();
562
563 assert_eq!(method1.class_name, Some("TestClass".to_string()));
564 assert_eq!(method2.class_name, Some("TestClass".to_string()));
565 assert_eq!(standalone.class_name, None);
566
567 assert!(method1.is_method);
568 assert!(method2.is_method);
569 assert!(!standalone.is_method);
570 }
571}